diff options
42 files changed, 290 insertions, 334 deletions
@@ -11,7 +11,7 @@ Version 1.7 (under development) - Dropped Python 2.4 and 2.5 compatibility. This is in favor of single-source - compatibility between Python 2.6, 2.7 and 3.2+. + compatibility between Python 2.6, 2.7 and 3.3+. - Lexers added: @@ -53,7 +53,7 @@ reindent: @$(PYTHON) scripts/reindent.py -r -B . test: - @$(PYTHON) tests/run.py $(TESTS) + @$(PYTHON) tests/run.py $(TEST) test-coverage: - @$(PYTHON) tests/run.py -C $(TESTS) + @$(PYTHON) tests/run.py -C $(TEST) diff --git a/docs/generate.py b/docs/generate.py index 21d261a3..cd9438a8 100755 --- a/docs/generate.py +++ b/docs/generate.py @@ -48,7 +48,7 @@ def generate_lexer_docs(): modules = {} moduledocstrings = {} - for classname, data in sorted(LEXERS.iteritems(), key=lambda x: x[0]): + for classname, data in sorted(LEXERS.items(), key=lambda x: x[0]): module = data[0] mod = __import__(module, None, None, [classname]) cls = getattr(mod, classname) @@ -63,18 +63,21 @@ def generate_lexer_docs(): if module not in moduledocstrings: moduledocstrings[module] = mod.__doc__ - for module, lexers in sorted(modules.iteritems(), key=lambda x: x[0]): + for module, lexers in sorted(modules.items(), key=lambda x: x[0]): heading = moduledocstrings[module].splitlines()[4].strip().rstrip('.') out.append('\n' + heading + '\n' + '-'*len(heading) + '\n') for data in lexers: out.append(LEXERDOC % data) - return ''.join(out).decode('utf-8') + s = ''.join(out) + if isinstance(s, bytes): + s = s.decode('utf-8') + return s def generate_formatter_docs(): from pygments.formatters import FORMATTERS out = [] - for cls, data in sorted(FORMATTERS.iteritems(), + for cls, data in sorted(FORMATTERS.items(), key=lambda x: x[0].__name__): heading = cls.__name__ out.append('`' + heading + '`\n' + '-'*(2+len(heading)) + '\n') @@ -85,24 +88,30 @@ def generate_formatter_docs(): ''' % (', '.join(data[1]) or 'None', ', '.join(data[2]).replace('*', '\\*') or 'None')) - return ''.join(out).decode('utf-8') + s = ''.join(out) + if isinstance(s, bytes): + s = s.decode('utf-8') + return s def generate_filter_docs(): from pygments.filters import FILTERS out = [] - for name, cls in FILTERS.iteritems(): + for name, cls in FILTERS.items(): out.append(''' `%s` %s :Name: %s ''' % (cls.__name__, cls.__doc__, name)) - return ''.join(out).decode('utf-8') + s = ''.join(out) + if isinstance(s, bytes): + s = s.decode('utf-8') + return s def generate_changelog(): fn = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'CHANGES')) - f = file(fn) + f = open(fn) result = [] in_header = False header = True @@ -115,12 +124,15 @@ def generate_changelog(): else: result.append(line.rstrip()) f.close() - return '\n'.join(result).decode('utf-8') + s = '\n'.join(result) + if isinstance(s, bytes): + s = s.decode('utf-8') + return s def generate_authors(): fn = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'AUTHORS')) - f = file(fn) + f = open(fn, 'rb') r = f.read().rstrip().decode('utf-8') f.close() return r @@ -410,7 +422,7 @@ def handle_python(filename, fp, dst): else: return '/docs/%s/' % href parts = generate_documentation(content, urlize) - result = file(os.path.join(dst, title + '.py'), 'w') + result = open(os.path.join(dst, title + '.py'), 'w') result.write('# -*- coding: utf-8 -*-\n') result.write('"""\n Pygments Documentation - %s\n' % title) result.write(' %s\n\n' % ('~' * (24 + len(title)))) @@ -428,7 +440,7 @@ def handle_html(filename, fp, dst): title = os.path.basename(filename)[:-4] content = fp.read().decode('utf-8') c = generate_documentation(content, (lambda x: './%s.html' % x)) - result = file(os.path.join(dst, title + '.html'), 'w') + result = open(os.path.join(dst, title + '.html'), 'wb') c['style'] = STYLESHEET + PYGMENTS_FORMATTER.get_style_defs('.syntax') c['generation_date'] = now c['file_id'] = title @@ -448,7 +460,7 @@ def run(handle_file, dst, sources=()): if not os.path.isfile(fn): continue print('Processing %s' % fn) - f = open(fn) + f = open(fn, 'rb') try: handle_file(fn, f, dst) finally: diff --git a/external/rst-directive-old.py b/external/rst-directive-old.py deleted file mode 100644 index 4965576c..00000000 --- a/external/rst-directive-old.py +++ /dev/null @@ -1,77 +0,0 @@ -# -*- coding: utf-8 -*- -""" - The Pygments reStructuredText directive - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - This fragment is a Docutils_ 0.4 directive that renders source code - (to HTML only, currently) via Pygments. - - To use it, adjust the options below and copy the code into a module - that you import on initialization. The code then automatically - registers a ``sourcecode`` directive that you can use instead of - normal code blocks like this:: - - .. sourcecode:: python - - My code goes here. - - If you want to have different code styles, e.g. one with line numbers - and one without, add formatters with their names in the VARIANTS dict - below. You can invoke them instead of the DEFAULT one by using a - directive option:: - - .. sourcecode:: python - :linenos: - - My code goes here. - - Look at the `directive documentation`_ to get all the gory details. - - .. _Docutils: http://docutils.sf.net/ - .. _directive documentation: - http://docutils.sourceforge.net/docs/howto/rst-directives.html - - :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -# Options -# ~~~~~~~ - -# Set to True if you want inline CSS styles instead of classes -INLINESTYLES = False - -from pygments.formatters import HtmlFormatter - -# The default formatter -DEFAULT = HtmlFormatter(noclasses=INLINESTYLES) - -# Add name -> formatter pairs for every variant you want to use -VARIANTS = { - # 'linenos': HtmlFormatter(noclasses=INLINESTYLES, linenos=True), -} - - -from docutils import nodes -from docutils.parsers.rst import directives - -from pygments import highlight -from pygments.lexers import get_lexer_by_name, TextLexer - -def pygments_directive(name, arguments, options, content, lineno, - content_offset, block_text, state, state_machine): - try: - lexer = get_lexer_by_name(arguments[0]) - except ValueError: - # no lexer found - use the text one instead of an exception - lexer = TextLexer() - # take an arbitrary option if more than one is given - formatter = options and VARIANTS[options.keys()[0]] or DEFAULT - parsed = highlight(u'\n'.join(content), lexer, formatter) - return [nodes.raw('', parsed, format='html')] - -pygments_directive.arguments = (1, 0, 1) -pygments_directive.content = 1 -pygments_directive.options = dict([(key, directives.flag) for key in VARIANTS]) - -directives.register_directive('sourcecode', pygments_directive) diff --git a/external/rst-directive.py b/external/rst-directive.py index f15e7dc8..8ce150c4 100644 --- a/external/rst-directive.py +++ b/external/rst-directive.py @@ -75,9 +75,8 @@ class Pygments(Directive): # no lexer found - use the text one instead of an exception lexer = TextLexer() # take an arbitrary option if more than one is given - formatter = self.options and VARIANTS[self.options.keys()[0]] or DEFAULT + formatter = self.options and VARIANTS[list(self.options)[0]] or DEFAULT parsed = highlight(u'\n'.join(self.content), lexer, formatter) return [nodes.raw('', parsed, format='html')] directives.register_directive('sourcecode', Pygments) - diff --git a/pygments/filters/__init__.py b/pygments/filters/__init__.py index c33dac7e..4806eeec 100644 --- a/pygments/filters/__init__.py +++ b/pygments/filters/__init__.py @@ -16,7 +16,7 @@ from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \ string_to_tokentype from pygments.filter import Filter from pygments.util import get_list_opt, get_int_opt, get_bool_opt, \ - get_choice_opt, ClassNotFound, OptionError + get_choice_opt, ClassNotFound, OptionError, text_type, string_types from pygments.plugin import find_plugin_filters @@ -117,7 +117,7 @@ class KeywordCaseFilter(Filter): def __init__(self, **options): Filter.__init__(self, **options) case = get_choice_opt(options, 'case', ['lower', 'upper', 'capitalize'], 'lower') - self.convert = getattr(unicode, case) + self.convert = getattr(text_type, case) def filter(self, lexer, stream): for ttype, value in stream: @@ -235,9 +235,11 @@ class VisibleWhitespaceFilter(Filter): def __init__(self, **options): Filter.__init__(self, **options) - for name, default in {'spaces': u'·', 'tabs': u'»', 'newlines': u'¶'}.items(): + for name, default in [('spaces', u'·'), + ('tabs', u'»'), + ('newlines', u'¶')]: opt = options.get(name, False) - if isinstance(opt, basestring) and len(opt) == 1: + if isinstance(opt, string_types) and len(opt) == 1: setattr(self, name, opt) else: setattr(self, name, (opt and default or '')) diff --git a/pygments/formatter.py b/pygments/formatter.py index e5ad0d25..b16ffee8 100644 --- a/pygments/formatter.py +++ b/pygments/formatter.py @@ -11,14 +11,14 @@ import codecs -from pygments.util import get_bool_opt +from pygments.util import get_bool_opt, string_types from pygments.styles import get_style_by_name __all__ = ['Formatter'] def _lookup_style(style): - if isinstance(style, basestring): + if isinstance(style, string_types): return get_style_by_name(style) return style diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py index f424e1b6..993d2ec2 100644 --- a/pygments/formatters/html.py +++ b/pygments/formatters/html.py @@ -14,11 +14,11 @@ from __future__ import print_function import os import sys import os.path -import StringIO from pygments.formatter import Formatter from pygments.token import Token, Text, STANDARD_TYPES -from pygments.util import get_bool_opt, get_int_opt, get_list_opt, bytes +from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ + StringIO, string_types, iteritems try: import ctags @@ -455,7 +455,7 @@ class HtmlFormatter(Formatter): """ if arg is None: arg = ('cssclass' in self.options and '.'+self.cssclass or '') - if isinstance(arg, basestring): + if isinstance(arg, string_types): args = [arg] else: args = list(arg) @@ -469,7 +469,7 @@ class HtmlFormatter(Formatter): return ', '.join(tmp) styles = [(level, ttype, cls, style) - for cls, (style, ttype, level) in self.class2style.iteritems() + for cls, (style, ttype, level) in iteritems(self.class2style) if cls and style] styles.sort() lines = ['%s { %s } /* %s */' % (prefix(cls), style, repr(ttype)[6:]) @@ -536,7 +536,7 @@ class HtmlFormatter(Formatter): yield 0, DOC_FOOTER def _wrap_tablelinenos(self, inner): - dummyoutfile = StringIO.StringIO() + dummyoutfile = StringIO() lncount = 0 for t, line in inner: if t: diff --git a/pygments/formatters/img.py b/pygments/formatters/img.py index 615c722d..cd0debea 100644 --- a/pygments/formatters/img.py +++ b/pygments/formatters/img.py @@ -12,8 +12,8 @@ import sys from pygments.formatter import Formatter -from pygments.util import get_bool_opt, get_int_opt, \ - get_list_opt, get_choice_opt +from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ + get_choice_opt, xrange # Import this carefully try: @@ -25,7 +25,10 @@ except ImportError: try: import _winreg except ImportError: - _winreg = None + try: + import winreg as _winreg + except ImportError: + _winreg = None __all__ = ['ImageFormatter', 'GifImageFormatter', 'JpgImageFormatter', 'BmpImageFormatter'] @@ -72,7 +75,10 @@ class FontManager(object): self._create_nix() def _get_nix_font_path(self, name, style): - from commands import getstatusoutput + try: + from commands import getstatusoutput + except ImportError: + from subprocess import getstatusoutput exit, out = getstatusoutput('fc-list "%s:style=%s" file' % (name, style)) if not exit: diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py index e9408aae..93a405e2 100644 --- a/pygments/formatters/latex.py +++ b/pygments/formatters/latex.py @@ -8,11 +8,13 @@ :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ + from __future__ import division from pygments.formatter import Formatter from pygments.token import Token, STANDARD_TYPES -from pygments.util import get_bool_opt, get_int_opt, StringIO +from pygments.util import get_bool_opt, get_int_opt, StringIO, xrange, \ + iteritems __all__ = ['LatexFormatter'] @@ -292,7 +294,7 @@ class LatexFormatter(Formatter): """ cp = self.commandprefix styles = [] - for name, definition in self.cmd2def.iteritems(): + for name, definition in iteritems(self.cmd2def): styles.append(r'\expandafter\def\csname %s@tok@%s\endcsname{%s}' % (cp, name, definition)) return STYLE_TEMPLATE % {'cp': self.commandprefix, @@ -307,14 +309,14 @@ class LatexFormatter(Formatter): realoutfile = outfile outfile = StringIO() - outfile.write(ur'\begin{Verbatim}[commandchars=\\\{\}') + outfile.write(r'\begin{Verbatim}[commandchars=\\\{\}') if self.linenos: start, step = self.linenostart, self.linenostep outfile.write(u',numbers=left' + (start and u',firstnumber=%d' % start or u'') + (step and u',stepnumber=%d' % step or u'')) if self.mathescape or self.texcomments: - outfile.write(ur',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}') + outfile.write(r',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}') if self.verboptions: outfile.write(u',' + self.verboptions) outfile.write(u']\n') diff --git a/pygments/formatters/other.py b/pygments/formatters/other.py index 00fe8ba6..3376b309 100644 --- a/pygments/formatters/other.py +++ b/pygments/formatters/other.py @@ -10,7 +10,7 @@ """ from pygments.formatter import Formatter -from pygments.util import OptionError, get_choice_opt, b +from pygments.util import OptionError, get_choice_opt from pygments.token import Token from pygments.console import colorize @@ -79,7 +79,7 @@ class RawTokenFormatter(Formatter): def format(self, tokensource, outfile): try: - outfile.write(b('')) + outfile.write(b'') except TypeError: raise TypeError('The raw tokens formatter needs a binary ' 'output file') diff --git a/pygments/lexer.py b/pygments/lexer.py index ce851437..36f2f4a8 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -14,18 +14,18 @@ from pygments.filter import apply_filters, Filter from pygments.filters import get_filter_by_name from pygments.token import Error, Text, Other, _TokenType from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ - make_analysator + make_analysator, text_type, add_metaclass, iteritems __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer', 'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this'] -_encoding_map = [('\xef\xbb\xbf', 'utf-8'), - ('\xff\xfe\0\0', 'utf-32'), - ('\0\0\xfe\xff', 'utf-32be'), - ('\xff\xfe', 'utf-16'), - ('\xfe\xff', 'utf-16be')] +_encoding_map = [(b'\xef\xbb\xbf', 'utf-8'), + (b'\xff\xfe\0\0', 'utf-32'), + (b'\0\0\xfe\xff', 'utf-32be'), + (b'\xff\xfe', 'utf-16'), + (b'\xfe\xff', 'utf-16be')] _default_analyse = staticmethod(lambda x: 0.0) @@ -42,6 +42,7 @@ class LexerMeta(type): return type.__new__(cls, name, bases, d) +@add_metaclass(LexerMeta) class Lexer(object): """ Lexer for a specific language. @@ -84,8 +85,6 @@ class Lexer(object): #: Priority, should multiple lexers match and no content is provided priority = 0 - __metaclass__ = LexerMeta - def __init__(self, **options): self.options = options self.stripnl = get_bool_opt(options, 'stripnl', True) @@ -136,7 +135,7 @@ class Lexer(object): Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ - if not isinstance(text, unicode): + if not isinstance(text, text_type): if self.encoding == 'guess': try: text = text.decode('utf-8') @@ -155,14 +154,13 @@ class Lexer(object): decoded = None for bom, encoding in _encoding_map: if text.startswith(bom): - decoded = unicode(text[len(bom):], encoding, - errors='replace') + decoded = text[len(bom):].decode(encoding, 'replace') break # no BOM found, so use chardet if decoded is None: enc = chardet.detect(text[:1024]) # Guess using first 1KB - decoded = unicode(text, enc.get('encoding') or 'utf-8', - errors='replace') + decoded = text.decode(enc.get('encoding') or 'utf-8', + 'replace') text = decoded else: text = text.decode(self.encoding) @@ -476,7 +474,7 @@ class RegexLexerMeta(LexerMeta): """Preprocess a dictionary of token definitions.""" processed = cls._all_tokens[name] = {} tokendefs = tokendefs or cls.tokens[name] - for state in tokendefs.keys(): + for state in list(tokendefs): cls._process_state(tokendefs, processed, state) return processed @@ -497,7 +495,7 @@ class RegexLexerMeta(LexerMeta): for c in itertools.chain((cls,), cls.__mro__): toks = c.__dict__.get('tokens', {}) - for state, items in toks.iteritems(): + for state, items in iteritems(toks): curitems = tokens.get(state) if curitems is None: tokens[state] = items @@ -537,13 +535,13 @@ class RegexLexerMeta(LexerMeta): return type.__call__(cls, *args, **kwds) +@add_metaclass(RegexLexerMeta) class RegexLexer(Lexer): """ Base for simple stateful regular expression-based lexers. Simplifies the lexing process so that you need only provide a list of states and regular expressions. """ - __metaclass__ = RegexLexerMeta #: Flags for compiling the regular expressions. #: Defaults to MULTILINE. @@ -722,7 +720,7 @@ def do_insertions(insertions, tokens): """ insertions = iter(insertions) try: - index, itokens = insertions.next() + index, itokens = next(insertions) except StopIteration: # no insertions for item in tokens: @@ -748,7 +746,7 @@ def do_insertions(insertions, tokens): realpos += len(it_value) oldi = index - i try: - index, itokens = insertions.next() + index, itokens = next(insertions) except StopIteration: insleft = False break # not strictly necessary @@ -763,7 +761,7 @@ def do_insertions(insertions, tokens): yield realpos, t, v realpos += len(v) try: - index, itokens = insertions.next() + index, itokens = next(insertions) except StopIteration: insleft = False break # not strictly necessary diff --git a/pygments/lexers/__init__.py b/pygments/lexers/__init__.py index 6334b54f..d844ef0d 100644 --- a/pygments/lexers/__init__.py +++ b/pygments/lexers/__init__.py @@ -18,11 +18,11 @@ from os.path import basename from pygments.lexers._mapping import LEXERS from pygments.modeline import get_filetype_from_buffer from pygments.plugin import find_plugin_lexers -from pygments.util import ClassNotFound, bytes +from pygments.util import ClassNotFound, itervalues __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class', - 'guess_lexer'] + LEXERS.keys() + 'guess_lexer'] + list(LEXERS) _lexer_cache = {} _pattern_cache = {} @@ -55,7 +55,7 @@ def get_all_lexers(): Return a generator of tuples in the form ``(name, aliases, filenames, mimetypes)`` of all know lexers. """ - for item in LEXERS.itervalues(): + for item in itervalues(LEXERS): yield item[1:] for lexer in find_plugin_lexers(): yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes @@ -68,7 +68,7 @@ def find_lexer_class(name): if name in _lexer_cache: return _lexer_cache[name] # lookup builtin lexers - for module_name, lname, aliases, _, _ in LEXERS.itervalues(): + for module_name, lname, aliases, _, _ in itervalues(LEXERS): if name == lname: _load_lexers(module_name) return _lexer_cache[name] @@ -83,7 +83,7 @@ def get_lexer_by_name(_alias, **options): Get a lexer by an alias. """ # lookup builtin lexers - for module_name, name, aliases, _, _ in LEXERS.itervalues(): + for module_name, name, aliases, _, _ in itervalues(LEXERS): if _alias.lower() in aliases: if name not in _lexer_cache: _load_lexers(module_name) @@ -103,7 +103,7 @@ def get_lexer_for_filename(_fn, code=None, **options): """ matches = [] fn = basename(_fn) - for modname, name, _, filenames, _ in LEXERS.itervalues(): + for modname, name, _, filenames, _ in itervalues(LEXERS): for filename in filenames: if _fn_matches(fn, filename): if name not in _lexer_cache: @@ -141,7 +141,7 @@ def get_lexer_for_mimetype(_mime, **options): """ Get a lexer for a mimetype. """ - for modname, name, _, _, mimetypes in LEXERS.itervalues(): + for modname, name, _, _, mimetypes in itervalues(LEXERS): if _mime in mimetypes: if name not in _lexer_cache: _load_lexers(modname) diff --git a/pygments/lexers/_luabuiltins.py b/pygments/lexers/_luabuiltins.py index fcccd3f5..40037357 100644 --- a/pygments/lexers/_luabuiltins.py +++ b/pygments/lexers/_luabuiltins.py @@ -15,6 +15,7 @@ from __future__ import print_function + MODULES = {'basic': ['_G', '_VERSION', 'assert', @@ -144,7 +145,10 @@ MODULES = {'basic': ['_G', if __name__ == '__main__': import re - import urllib + try: + from urllib import urlopen + except ImportError: + from urllib.request import urlopen import pprint # you can't generally find out what module a function belongs to if you @@ -190,7 +194,7 @@ if __name__ == '__main__': def get_newest_version(): - f = urllib.urlopen('http://www.lua.org/manual/') + f = urlopen('http://www.lua.org/manual/') r = re.compile(r'^<A HREF="(\d\.\d)/">Lua \1</A>') for line in f: m = r.match(line) @@ -198,7 +202,7 @@ if __name__ == '__main__': return m.groups()[0] def get_lua_functions(version): - f = urllib.urlopen('http://www.lua.org/manual/%s/' % version) + f = urlopen('http://www.lua.org/manual/%s/' % version) r = re.compile(r'^<A HREF="manual.html#pdf-(.+)">\1</A>') functions = [] for line in f: @@ -208,7 +212,7 @@ if __name__ == '__main__': return functions def get_function_module(name): - for mod, cb in module_callbacks().iteritems(): + for mod, cb in module_callbacks().items(): if cb(name): return mod if '.' in name: diff --git a/pygments/lexers/_phpbuiltins.py b/pygments/lexers/_phpbuiltins.py index 366a0f30..2f5ec851 100644 --- a/pygments/lexers/_phpbuiltins.py +++ b/pygments/lexers/_phpbuiltins.py @@ -3712,7 +3712,10 @@ if __name__ == '__main__': import re import shutil import tarfile - import urllib + try: + from urllib import urlretrieve + except ImportError: + from urllib.request import urlretrieve PHP_MANUAL_URL = 'http://us3.php.net/distributions/manual/php_manual_en.tar.gz' PHP_MANUAL_DIR = './php-chunked-xhtml/' @@ -3753,7 +3756,7 @@ if __name__ == '__main__': return modules def get_php_references(): - download = urllib.urlretrieve(PHP_MANUAL_URL) + download = urlretrieve(PHP_MANUAL_URL) tar = tarfile.open(download[0]) tar.extractall() tar.close() @@ -3780,7 +3783,7 @@ if __name__ == '__main__': def run(): print('>> Downloading Function Index') modules = get_php_functions() - total = sum(len(v) for v in modules.itervalues()) + total = sum(len(v) for v in modules.values()) print('%d functions found' % total) regenerate(__file__, modules) shutil.rmtree(PHP_MANUAL_DIR) diff --git a/pygments/lexers/_postgres_builtins.py b/pygments/lexers/_postgres_builtins.py index 32206e9b..11dc6dec 100644 --- a/pygments/lexers/_postgres_builtins.py +++ b/pygments/lexers/_postgres_builtins.py @@ -10,7 +10,10 @@ """ import re -import urllib +try: + from urllib import urlopen +except ImportError: + from urllib.request import urlopen # One man's constant is another man's variable. SOURCE_URL = 'https://github.com/postgres/postgres/raw/master' @@ -18,11 +21,11 @@ KEYWORDS_URL = SOURCE_URL + '/doc/src/sgml/keywords.sgml' DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml' def update_myself(): - data_file = list(fetch(DATATYPES_URL)) + data_file = list(urlopen(DATATYPES_URL)) datatypes = parse_datatypes(data_file) pseudos = parse_pseudos(data_file) - keywords = parse_keywords(fetch(KEYWORDS_URL)) + keywords = parse_keywords(urlopen(KEYWORDS_URL)) update_consts(__file__, 'DATATYPES', datatypes) update_consts(__file__, 'PSEUDO_TYPES', pseudos) update_consts(__file__, 'KEYWORDS', keywords) @@ -96,9 +99,6 @@ def parse_pseudos(f): return dt -def fetch(url): - return urllib.urlopen(url) - def update_consts(filename, constname, content): f = open(filename) lines = f.readlines() diff --git a/pygments/lexers/_robotframeworklexer.py b/pygments/lexers/_robotframeworklexer.py index e90918da..f94ac3f2 100644 --- a/pygments/lexers/_robotframeworklexer.py +++ b/pygments/lexers/_robotframeworklexer.py @@ -27,6 +27,7 @@ import re from pygments.lexer import Lexer from pygments.token import Token +from pygments.util import text_type HEADING = Token.Generic.Heading @@ -77,7 +78,7 @@ class RobotFrameworkLexer(Lexer): for value, token in row_tokenizer.tokenize(row): for value, token in var_tokenizer.tokenize(value, token): if value: - yield index, token, unicode(value) + yield index, token, text_type(value) index += len(value) diff --git a/pygments/lexers/_sourcemodbuiltins.py b/pygments/lexers/_sourcemodbuiltins.py index 0b3d4bfd..eee84d0b 100644 --- a/pygments/lexers/_sourcemodbuiltins.py +++ b/pygments/lexers/_sourcemodbuiltins.py @@ -1014,7 +1014,10 @@ if __name__ == '__main__': import pprint import re import sys - import urllib + try: + from urllib import urlopen + except ImportError: + from urllib.request import urlopen # urllib ends up wanting to import a module called 'math' -- if # pygments/lexers is in the path, this ends badly. @@ -1023,7 +1026,7 @@ if __name__ == '__main__': del sys.path[i] def get_version(): - f = urllib.urlopen('http://docs.sourcemod.net/api/index.php') + f = urlopen('http://docs.sourcemod.net/api/index.php') r = re.compile(r'SourceMod v\.<b>([\d\.]+)</td>') for line in f: m = r.search(line) @@ -1031,7 +1034,7 @@ if __name__ == '__main__': return m.groups()[0] def get_sm_functions(): - f = urllib.urlopen('http://docs.sourcemod.net/api/SMfuncs.js') + f = urlopen('http://docs.sourcemod.net/api/SMfuncs.js') r = re.compile(r'SMfunctions\[\d+\] = Array \("(?:public )?([^,]+)",".+"\);') functions = [] for line in f: diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index 88f0d983..e98a5385 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -15,7 +15,7 @@ from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \ LexerContext, include, combined, do_insertions, bygroups, using, this from pygments.token import Error, Text, Other, \ Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation -from pygments.util import get_bool_opt, get_list_opt, shebang_matches +from pygments.util import get_bool_opt, get_list_opt, shebang_matches, iteritems from pygments import unistring as uni @@ -353,7 +353,7 @@ class PythonConsoleLexer(Lexer): curcode = '' insertions = [] if (line.startswith(u'Traceback (most recent call last):') or - re.match(ur' File "[^"]+", line \d+\n$', line)): + re.match(u' File "[^"]+", line \\d+\\n$', line)): tb = 1 curtb = line tbindex = match.start() @@ -1126,7 +1126,7 @@ class LuaLexer(RegexLexer): self._functions = set() if self.func_name_highlighting: from pygments.lexers._luabuiltins import MODULES - for mod, func in MODULES.iteritems(): + for mod, func in iteritems(MODULES): if mod not in self.disabled_modules: self._functions.update(func) RegexLexer.__init__(self, **options) @@ -2177,7 +2177,7 @@ class Perl6Lexer(ExtendedRegexLexer): # process the corresponding one! tokens = { 'common' : [ - (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS.keys()) + r'])(?P=first_char)*)', brackets_callback(Comment.Multiline)), + (r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + r'])(?P=first_char)*)', brackets_callback(Comment.Multiline)), (r'#[^\n]*$', Comment.Singleline), (r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline), (r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline), @@ -2226,7 +2226,7 @@ class Perl6Lexer(ExtendedRegexLexer): (r'.+?', Text), ], 'token-sym-brackets' : [ - (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS.keys()) + '])(?P=first_char)*)', brackets_callback(Name), ('#pop', 'pre-token')), + (r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS) + '])(?P=first_char)*)', brackets_callback(Name), ('#pop', 'pre-token')), (r'', Name, ('#pop', 'pre-token')), ], 'token': [ diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index 809b1db3..3ab49fa4 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -878,7 +878,7 @@ class DelphiLexer(Lexer): if get_bool_opt(options, 'freepascal', True): self.keywords.update(self.FREE_PASCAL_KEYWORDS) self.builtins = set() - for unit in get_list_opt(options, 'units', self.BUILTIN_UNITS.keys()): + for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)): self.builtins.update(self.BUILTIN_UNITS[unit]) def get_tokens_unprocessed(self, text): diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py index 565bd8dd..fb4ecd88 100644 --- a/pygments/lexers/dotnet.py +++ b/pygments/lexers/dotnet.py @@ -14,7 +14,7 @@ from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, include, \ using, this from pygments.token import Punctuation, \ Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other -from pygments.util import get_choice_opt +from pygments.util import get_choice_opt, iteritems from pygments import unistring as uni from pygments.lexers.web import XmlLexer @@ -71,7 +71,7 @@ class CSharpLexer(RegexLexer): tokens = {} token_variants = True - for levelname, cs_ident in levels.items(): + for levelname, cs_ident in iteritems(levels): tokens[levelname] = { 'root': [ # method names @@ -126,7 +126,7 @@ class CSharpLexer(RegexLexer): } def __init__(self, **options): - level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(), 'basic') + level = get_choice_opt(options, 'unicodelevel', list(self.tokens), 'basic') if level not in self._all_tokens: # compile the regexes now self._tokens = self.__class__.process_tokendef(level) @@ -183,7 +183,7 @@ class NemerleLexer(RegexLexer): tokens = {} token_variants = True - for levelname, cs_ident in levels.items(): + for levelname, cs_ident in iteritems(levels): tokens[levelname] = { 'root': [ # method names @@ -284,7 +284,7 @@ class NemerleLexer(RegexLexer): } def __init__(self, **options): - level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(), + level = get_choice_opt(options, 'unicodelevel', list(self.tokens), 'basic') if level not in self._all_tokens: # compile the regexes now diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index 9df88d97..690a7d4d 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -1049,12 +1049,12 @@ class AgdaLexer(RegexLexer): (r'{!', Comment.Directive, 'hole'), # Lexemes: # Identifiers - (ur'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), + (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'), (r'\b(Set|Prop)\b', Keyword.Type), # Special Symbols (r'(\(|\)|\{|\})', Operator), - (ur'(\.{1,3}|\||[\u039B]|[\u2200]|[\u2192]|:|=|->)', Operator.Word), + (u'(\\.{1,3}|\\||[\u039B]|[\u2200]|[\u2192]|:|=|->)', Operator.Word), # Numbers (r'\d+[eE][+-]?\d+', Number.Float), (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index c4029822..bd7d7c1c 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -242,25 +242,25 @@ class ScalaLexer(RegexLexer): u'\ua760\ua762\ua764\ua766\ua768\ua76a\ua76c\ua76e\ua779\ua77b' u'\ua77d-\ua77e\ua780\ua782\ua784\ua786\ua78b\uff21-\uff3a]') - idrest = ur'%s(?:%s|[0-9])*(?:(?<=_)%s)?' % (letter, letter, op) + idrest = u'%s(?:%s|[0-9])*(?:(?<=_)%s)?' % (letter, letter, op) tokens = { 'root': [ # method names (r'(class|trait|object)(\s+)', bygroups(Keyword, Text), 'class'), - (ur"'%s" % idrest, Text.Symbol), + (u"'%s" % idrest, Text.Symbol), (r'[^\S\n]+', Text), (r'//.*?\n', Comment.Single), (r'/\*', Comment.Multiline, 'comment'), - (ur'@%s' % idrest, Name.Decorator), - (ur'(abstract|ca(?:se|tch)|d(?:ef|o)|e(?:lse|xtends)|' - ur'f(?:inal(?:ly)?|or(?:Some)?)|i(?:f|mplicit)|' - ur'lazy|match|new|override|pr(?:ivate|otected)' - ur'|re(?:quires|turn)|s(?:ealed|uper)|' - ur't(?:h(?:is|row)|ry)|va[lr]|w(?:hile|ith)|yield)\b|' + (u'@%s' % idrest, Name.Decorator), + (u'(abstract|ca(?:se|tch)|d(?:ef|o)|e(?:lse|xtends)|' + u'f(?:inal(?:ly)?|or(?:Some)?)|i(?:f|mplicit)|' + u'lazy|match|new|override|pr(?:ivate|otected)' + u'|re(?:quires|turn)|s(?:ealed|uper)|' + u't(?:h(?:is|row)|ry)|va[lr]|w(?:hile|ith)|yield)\\b|' u'(<[%:-]|=>|>:|[#=@_\u21D2\u2190])(\\b|(?=\\s)|$)', Keyword), - (ur':(?!%s)' % op, Keyword, 'type'), - (ur'%s%s\b' % (upper, idrest), Name.Class), + (u':(?!%s)' % op, Keyword, 'type'), + (u'%s%s\\b' % (upper, idrest), Name.Class), (r'(true|false|null)\b', Keyword.Constant), (r'(import|package)(\s+)', bygroups(Keyword, Text), 'import'), (r'(type)(\s+)', bygroups(Keyword, Text), 'type'), @@ -281,34 +281,34 @@ class ScalaLexer(RegexLexer): (r'\n', Text) ], 'class': [ - (ur'(%s|%s|`[^`]+`)(\s*)(\[)' % (idrest, op), + (u'(%s|%s|`[^`]+`)(\\s*)(\\[)' % (idrest, op), bygroups(Name.Class, Text, Operator), 'typeparam'), (r'\s+', Text), (r'{', Operator, '#pop'), (r'\(', Operator, '#pop'), (r'//.*?\n', Comment.Single, '#pop'), - (ur'%s|%s|`[^`]+`' % (idrest, op), Name.Class, '#pop'), + (u'%s|%s|`[^`]+`' % (idrest, op), Name.Class, '#pop'), ], 'type': [ (r'\s+', Text), (u'<[%:]|>:|[#_\u21D2]|forSome|type', Keyword), (r'([,\);}]|=>|=)(\s*)', bygroups(Operator, Text), '#pop'), (r'[\(\{]', Operator, '#push'), - (ur'((?:%s|%s|`[^`]+`)(?:\.(?:%s|%s|`[^`]+`))*)(\s*)(\[)' % + (u'((?:%s|%s|`[^`]+`)(?:\\.(?:%s|%s|`[^`]+`))*)(\\s*)(\\[)' % (idrest, op, idrest, op), bygroups(Keyword.Type, Text, Operator), ('#pop', 'typeparam')), - (ur'((?:%s|%s|`[^`]+`)(?:\.(?:%s|%s|`[^`]+`))*)(\s*)$' % + (u'((?:%s|%s|`[^`]+`)(?:\\.(?:%s|%s|`[^`]+`))*)(\\s*)$' % (idrest, op, idrest, op), bygroups(Keyword.Type, Text), '#pop'), (r'//.*?\n', Comment.Single, '#pop'), - (ur'\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type) + (u'\\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type) ], 'typeparam': [ (r'[\s,]+', Text), (u'<[%:]|=>|>:|[#_\u21D2]|forSome|type', Keyword), (r'([\]\)\}])', Operator, '#pop'), (r'[\(\[\{]', Operator, '#push'), - (ur'\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type) + (u'\\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type) ], 'comment': [ (r'[^/\*]+', Comment.Multiline), @@ -317,7 +317,7 @@ class ScalaLexer(RegexLexer): (r'[*/]', Comment.Multiline) ], 'import': [ - (ur'(%s|\.)+' % idrest, Name.Namespace, '#pop') + (u'(%s|\\.)+' % idrest, Name.Namespace, '#pop') ], } @@ -638,9 +638,9 @@ class IokeLexer(RegexLexer): r'System|Text|Tuple)(?![a-zA-Z0-9!:_?])', Name.Builtin), # functions - (ur'(generateMatchMethod|aliasMethod|\u03bb|\u028E|fnx|fn|method|' - ur'dmacro|dlecro|syntax|macro|dlecrox|lecrox|lecro|syntax)' - ur'(?![a-zA-Z0-9!:_?])', Name.Function), + (u'(generateMatchMethod|aliasMethod|\u03bb|\u028E|fnx|fn|method|' + u'dmacro|dlecro|syntax|macro|dlecrox|lecrox|lecro|syntax)' + u'(?![a-zA-Z0-9!:_?])', Name.Function), # Numbers (r'-?0[xX][0-9a-fA-F]+', Number.Hex), @@ -650,13 +650,13 @@ class IokeLexer(RegexLexer): (r'#\(', Punctuation), # Operators - (ur'(&&>>|\|\|>>|\*\*>>|:::|::|\.\.\.|===|\*\*>|\*\*=|&&>|&&=|' - ur'\|\|>|\|\|=|\->>|\+>>|!>>|<>>>|<>>|&>>|%>>|#>>|@>>|/>>|\*>>|' - ur'\?>>|\|>>|\^>>|~>>|\$>>|=>>|<<=|>>=|<=>|<\->|=~|!~|=>|\+\+|' - ur'\-\-|<=|>=|==|!=|&&|\.\.|\+=|\-=|\*=|\/=|%=|&=|\^=|\|=|<\-|' - ur'\+>|!>|<>|&>|%>|#>|\@>|\/>|\*>|\?>|\|>|\^>|~>|\$>|<\->|\->|' - ur'<<|>>|\*\*|\?\||\?&|\|\||>|<|\*|\/|%|\+|\-|&|\^|\||=|\$|!|~|' - ur'\?|#|\u2260|\u2218|\u2208|\u2209)', Operator), + (r'(&&>>|\|\|>>|\*\*>>|:::|::|\.\.\.|===|\*\*>|\*\*=|&&>|&&=|' + r'\|\|>|\|\|=|\->>|\+>>|!>>|<>>>|<>>|&>>|%>>|#>>|@>>|/>>|\*>>|' + r'\?>>|\|>>|\^>>|~>>|\$>>|=>>|<<=|>>=|<=>|<\->|=~|!~|=>|\+\+|' + r'\-\-|<=|>=|==|!=|&&|\.\.|\+=|\-=|\*=|\/=|%=|&=|\^=|\|=|<\-|' + r'\+>|!>|<>|&>|%>|#>|\@>|\/>|\*>|\?>|\|>|\^>|~>|\$>|<\->|\->|' + r'<<|>>|\*\*|\?\||\?&|\|\||>|<|\*|\/|%|\+|\-|&|\^|\||=|\$|!|~|' + u'\\?|#|\u2260|\u2218|\u2208|\u2209)', Operator), (r'(and|nand|or|xor|nor|return|import)(?![a-zA-Z0-9_!?])', Operator), @@ -1043,7 +1043,7 @@ class XtendLexer(RegexLexer): 'class'), (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r"(''')", String, 'template'), - (ur"(\u00BB)", String, 'template'), + (u'(\u00BB)', String, 'template'), (r'"(\\\\|\\"|[^"])*"', String), (r"'(\\\\|\\'|[^'])*'", String), (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), @@ -1062,7 +1062,7 @@ class XtendLexer(RegexLexer): ], 'template': [ (r"'''", String, '#pop'), - (ur"\u00AB", String, '#pop'), + (u'\u00AB', String, '#pop'), (r'.', String) ], } diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index ffaa40cc..8e9fad6a 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -1150,18 +1150,18 @@ class AppleScriptLexer(RegexLexer): tokens = { 'root': [ (r'\s+', Text), - (ur'¬\n', String.Escape), + (u'¬\\n', String.Escape), (r"'s\s+", Text), # This is a possessive, consider moving (r'(--|#).*?$', Comment), (r'\(\*', Comment.Multiline, 'comment'), (r'[\(\){}!,.:]', Punctuation), - (ur'(«)([^»]+)(»)', + (u'(«)([^»]+)(»)', bygroups(Text, Name.Builtin, Text)), (r'\b((?:considering|ignoring)\s*)' r'(application responses|case|diacriticals|hyphens|' r'numeric strings|punctuation|white space)', bygroups(Keyword, Name.Builtin)), - (ur'(-|\*|\+|&|≠|>=?|<=?|=|≥|≤|/|÷|\^)', Operator), + (u'(-|\\*|\\+|&|≠|>=?|<=?|=|≥|≤|/|÷|\\^)', Operator), (r"\b(%s)\b" % '|'.join(Operators), Operator.Word), (r'^(\s*(?:on|end)\s+)' r'(%s)' % '|'.join(StudioEvents[::-1]), @@ -1770,10 +1770,10 @@ class GherkinLexer(RegexLexer): filenames = ['*.feature'] mimetypes = ['text/x-gherkin'] - feature_keywords = ur'^(기능|機能|功能|フィーチャ|خاصية|תכונה|Функціонал|Функционалност|Функционал|Фича|Особина|Могућност|Özellik|Właściwość|Tính năng|Trajto|Savybė|Požiadavka|Požadavek|Osobina|Ominaisuus|Omadus|OH HAI|Mogućnost|Mogucnost|Jellemző|Fīča|Funzionalità|Funktionalität|Funkcionalnost|Funkcionalitāte|Funcționalitate|Functionaliteit|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Feature|Egenskap|Egenskab|Crikey|Característica|Arwedd)(:)(.*)$' - feature_element_keywords = ur'^(\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$' - examples_keywords = ur'^(\s*)(예|例子|例|サンプル|امثلة|דוגמאות|Сценарији|Примери|Приклади|Мисоллар|Значения|Örnekler|Voorbeelden|Variantai|Tapaukset|Scenarios|Scenariji|Scenarijai|Příklady|Példák|Príklady|Przykłady|Primjeri|Primeri|Piemēri|Pavyzdžiai|Paraugs|Juhtumid|Exemplos|Exemples|Exemplele|Exempel|Examples|Esempi|Enghreifftiau|Ekzemploj|Eksempler|Ejemplos|EXAMPLZ|Dữ liệu|Contoh|Cobber|Beispiele)(:)(.*)$' - step_keywords = ur'^(\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假如|但是|但し|並且|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y\'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y\'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )' + feature_keywords = u'^(기능|機能|功能|フィーチャ|خاصية|תכונה|Функціонал|Функционалност|Функционал|Фича|Особина|Могућност|Özellik|Właściwość|Tính năng|Trajto|Savybė|Požiadavka|Požadavek|Osobina|Ominaisuus|Omadus|OH HAI|Mogućnost|Mogucnost|Jellemző|Fīča|Funzionalità|Funktionalität|Funkcionalnost|Funkcionalitāte|Funcționalitate|Functionaliteit|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Feature|Egenskap|Egenskab|Crikey|Característica|Arwedd)(:)(.*)$' + feature_element_keywords = u'^(\\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$' + examples_keywords = u'^(\\s*)(예|例子|例|サンプル|امثلة|דוגמאות|Сценарији|Примери|Приклади|Мисоллар|Значения|Örnekler|Voorbeelden|Variantai|Tapaukset|Scenarios|Scenariji|Scenarijai|Příklady|Példák|Príklady|Przykłady|Primjeri|Primeri|Piemēri|Pavyzdžiai|Paraugs|Juhtumid|Exemplos|Exemples|Exemplele|Exempel|Examples|Esempi|Enghreifftiau|Ekzemploj|Eksempler|Ejemplos|EXAMPLZ|Dữ liệu|Contoh|Cobber|Beispiele)(:)(.*)$' + step_keywords = u'^(\\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假如|但是|但し|並且|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y\'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y\'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )' tokens = { 'comments': [ @@ -3737,9 +3737,9 @@ class RexxLexer(RegexLexer): r'while)\b', Keyword.Reserved), ], 'operator': [ - (ur'(-|//|/|\(|\)|\*\*|\*|\\<<|\\<|\\==|\\=|\\>>|\\>|\\|\|\||\||' - ur'&&|&|%|\+|<<=|<<|<=|<>|<|==|=|><|>=|>>=|>>|>|¬<<|¬<|¬==|¬=|' - ur'¬>>|¬>|¬|\.|,)', Operator), + (r'(-|//|/|\(|\)|\*\*|\*|\\<<|\\<|\\==|\\=|\\>>|\\>|\\|\|\||\||' + r'&&|&|%|\+|<<=|<<|<=|<>|<|==|=|><|>=|>>=|>>|>|¬<<|¬<|¬==|¬=|' + r'¬>>|¬>|¬|\.|,)', Operator), ], 'string_double': [ (r'[^"\n]+', String), diff --git a/pygments/lexers/special.py b/pygments/lexers/special.py index d7fe6b53..9ea2e22c 100644 --- a/pygments/lexers/special.py +++ b/pygments/lexers/special.py @@ -10,11 +10,10 @@ """ import re -import cStringIO from pygments.lexer import Lexer from pygments.token import Token, Error, Text -from pygments.util import get_choice_opt, b +from pygments.util import get_choice_opt, text_type, BytesIO __all__ = ['TextLexer', 'RawTokenLexer'] @@ -35,7 +34,7 @@ class TextLexer(Lexer): _ttype_cache = {} -line_re = re.compile(b('.*?\n')) +line_re = re.compile(b'.*?\n') class RawTokenLexer(Lexer): """ @@ -60,12 +59,12 @@ class RawTokenLexer(Lexer): Lexer.__init__(self, **options) def get_tokens(self, text): - if isinstance(text, unicode): + if isinstance(text, text_type): # raw token stream never has any non-ASCII characters text = text.encode('ascii') if self.compress == 'gz': import gzip - gzipfile = gzip.GzipFile('', 'rb', 9, cStringIO.StringIO(text)) + gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text)) text = gzipfile.read() elif self.compress == 'bz2': import bz2 @@ -73,7 +72,7 @@ class RawTokenLexer(Lexer): # do not call Lexer.get_tokens() because we do not want Unicode # decoding to occur, and stripping is not optional. - text = text.strip(b('\n')) + b('\n') + text = text.strip(b'\n') + b'\n' for i, t, v in self.get_tokens_unprocessed(text): yield t, v @@ -81,7 +80,7 @@ class RawTokenLexer(Lexer): length = 0 for match in line_re.finditer(text): try: - ttypestr, val = match.group().split(b('\t'), 1) + ttypestr, val = match.group().split(b'\t', 1) except ValueError: val = match.group().decode(self.encoding) ttype = Error diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index 94a131ff..4acc5372 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -42,8 +42,9 @@ import re from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups from pygments.token import Punctuation, \ - Text, Comment, Operator, Keyword, Name, String, Number, Generic + Text, Comment, Operator, Keyword, Name, String, Number, Generic from pygments.lexers import get_lexer_by_name, ClassNotFound +from pygments.util import iteritems from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \ PSEUDO_TYPES, PLPGSQL_KEYWORDS @@ -176,7 +177,7 @@ class PlPgsqlLexer(PostgresBase, RegexLexer): mimetypes = ['text/x-plpgsql'] flags = re.IGNORECASE - tokens = dict((k, l[:]) for (k, l) in PostgresLexer.tokens.iteritems()) + tokens = dict((k, l[:]) for (k, l) in iteritems(PostgresLexer.tokens)) # extend the keywords list for i, pattern in enumerate(tokens['root']): @@ -210,7 +211,7 @@ class PsqlRegexLexer(PostgresBase, RegexLexer): aliases = [] # not public flags = re.IGNORECASE - tokens = dict((k, l[:]) for (k, l) in PostgresLexer.tokens.iteritems()) + tokens = dict((k, l[:]) for (k, l) in iteritems(PostgresLexer.tokens)) tokens['root'].append( (r'\\[^\s]+', Keyword.Pseudo, 'psql-command')) @@ -244,12 +245,13 @@ class lookahead(object): def send(self, i): self._nextitem = i return i - def next(self): + def __next__(self): if self._nextitem is not None: ni = self._nextitem self._nextitem = None return ni - return self.iter.next() + return next(self.iter) + next = __next__ class PostgresConsoleLexer(Lexer): @@ -277,7 +279,7 @@ class PostgresConsoleLexer(Lexer): insertions = [] while 1: try: - line = lines.next() + line = next(lines) except StopIteration: # allow the emission of partially collected items # the repl loop will be broken below @@ -314,7 +316,7 @@ class PostgresConsoleLexer(Lexer): # Emit the output lines out_token = Generic.Output while 1: - line = lines.next() + line = next(lines) mprompt = re_prompt.match(line) if mprompt is not None: # push the line back to have it processed by the prompt diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py index a387abad..740f72a0 100644 --- a/pygments/lexers/web.py +++ b/pygments/lexers/web.py @@ -13,11 +13,11 @@ import re import copy from pygments.lexer import RegexLexer, ExtendedRegexLexer, bygroups, using, \ - include, this + include, this from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Other, Punctuation, Literal + Number, Other, Punctuation, Literal from pygments.util import get_bool_opt, get_list_opt, looks_like_xml, \ - html_doctype_matches, unirange + html_doctype_matches, unirange, iteritems from pygments.lexers.agile import RubyLexer from pygments.lexers.compiled import ScalaLexer @@ -891,7 +891,7 @@ class PhpLexer(RegexLexer): self._functions = set() if self.funcnamehighlighting: from pygments.lexers._phpbuiltins import MODULES - for key, value in MODULES.iteritems(): + for key, value in iteritems(MODULES): if key not in self.disabledmodules: self._functions.update(value) RegexLexer.__init__(self, **options) @@ -2355,7 +2355,7 @@ class SassLexer(ExtendedRegexLexer): (r"\*/", Comment, '#pop'), ], } - for group, common in common_sass_tokens.iteritems(): + for group, common in iteritems(common_sass_tokens): tokens[group] = copy.copy(common) tokens['value'].append((r'\n', Text, 'root')) tokens['selector'].append((r'\n', Text, 'root')) @@ -2402,7 +2402,7 @@ class ScssLexer(RegexLexer): (r"\*/", Comment, '#pop'), ], } - for group, common in common_sass_tokens.iteritems(): + for group, common in iteritems(common_sass_tokens): tokens[group] = copy.copy(common) tokens['value'].extend([(r'\n', Text), (r'[;{}]', Punctuation, 'root')]) tokens['selector'].extend([(r'\n', Text), (r'[;{}]', Punctuation, 'root')]) @@ -3406,7 +3406,7 @@ class XQueryLexer(ExtendedRegexLexer): 'xml_comment': [ (r'(-->)', popstate_xmlcomment_callback), (r'[^-]{1,2}', Literal), - (ur'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + + (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + unirange(0x10000, 0x10ffff), Literal), ], 'processing_instruction': [ @@ -3416,12 +3416,12 @@ class XQueryLexer(ExtendedRegexLexer): ], 'processing_instruction_content': [ (r'\?>', String.Doc, '#pop'), - (ur'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + + (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + unirange(0x10000, 0x10ffff), Literal), ], 'cdata_section': [ (r']]>', String.Doc, '#pop'), - (ur'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + + (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + unirange(0x10000, 0x10ffff), Literal), ], 'start_tag': [ @@ -3490,7 +3490,7 @@ class XQueryLexer(ExtendedRegexLexer): ], 'pragmacontents': [ (r'#\)', Punctuation, 'operator'), - (ur'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + + (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + unirange(0x10000, 0x10ffff), Literal), (r'(\s+)', Text), ], @@ -4063,9 +4063,9 @@ class LassoLexer(RegexLexer): self._members = set() if self.builtinshighlighting: from pygments.lexers._lassobuiltins import BUILTINS, MEMBERS - for key, value in BUILTINS.iteritems(): + for key, value in iteritems(BUILTINS): self._builtins.update(value) - for key, value in MEMBERS.iteritems(): + for key, value in iteritems(MEMBERS): self._members.update(value) RegexLexer.__init__(self, **options) diff --git a/pygments/style.py b/pygments/style.py index d0fc26be..bb54377c 100644 --- a/pygments/style.py +++ b/pygments/style.py @@ -10,6 +10,7 @@ """ from pygments.token import Token, STANDARD_TYPES +from pygments.util import add_metaclass class StyleMeta(type): @@ -104,8 +105,8 @@ class StyleMeta(type): return len(cls._styles) +@add_metaclass(StyleMeta) class Style(object): - __metaclass__ = StyleMeta #: overall background color (``None`` means transparent) background_color = '#ffffff' diff --git a/pygments/unistring.py b/pygments/unistring.py index da87b6df..2752037f 100644 --- a/pygments/unistring.py +++ b/pygments/unistring.py @@ -11,7 +11,8 @@ :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ -from pygments.util import u_prefix + +from pygments.util import u_prefix, unichr Cc = u'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f' @@ -133,7 +134,7 @@ except UnicodeDecodeError: Cs = '' # Jython can't handle isolated surrogates\n\n""" % repr(val).lstrip('u')) else: f.write('%s = %r\n\n' % (cat, val)) - f.write('cats = %r\n\n' % sorted(categories.keys())) + f.write('cats = %r\n\n' % sorted(categories)) f.write('# Generated from unidata %s\n\n' % (unicodedata.unidata_version,)) f.write(footer) diff --git a/pygments/util.py b/pygments/util.py index d40a88c8..c302900f 100644 --- a/pygments/util.py +++ b/pygments/util.py @@ -11,7 +11,6 @@ import re import sys -import codecs split_path_re = re.compile(r'[/\\ ]') @@ -52,7 +51,7 @@ def get_bool_opt(options, optname, default=None): return string elif isinstance(string, int): return bool(string) - elif not isinstance(string, basestring): + elif not isinstance(string, string_types): raise OptionError('Invalid type %r for option %s; use ' '1/0, yes/no, true/false, on/off' % ( string, optname)) @@ -82,7 +81,7 @@ def get_int_opt(options, optname, default=None): def get_list_opt(options, optname, default=None): val = options.get(optname, default) - if isinstance(val, basestring): + if isinstance(val, string_types): return val.split() elif isinstance(val, (list, tuple)): return list(val) @@ -253,25 +252,35 @@ def unirange(a, b): # Python 2/3 compatibility -if sys.version_info < (3,0): - b = bytes = str +if sys.version_info < (3, 0): + unichr = unichr + xrange = xrange + string_types = (str, unicode) + text_type = unicode u_prefix = 'u' + iteritems = dict.iteritems + itervalues = dict.itervalues import StringIO, cStringIO - BytesIO = cStringIO.StringIO + # unfortunately, io.StringIO in Python 2 doesn't accept str at all StringIO = StringIO.StringIO - uni_open = codecs.open + BytesIO = cStringIO.StringIO else: - import builtins - bytes = builtins.bytes + unichr = chr + xrange = range + string_types = (str,) + text_type = str u_prefix = '' - def b(s): - if isinstance(s, str): - return bytes(map(ord, s)) - elif isinstance(s, bytes): - return s - else: - raise TypeError("Invalid argument %r for b()" % (s,)) - import io - BytesIO = io.BytesIO - StringIO = io.StringIO - uni_open = builtins.open + iteritems = dict.items + itervalues = dict.values + from io import StringIO, BytesIO + +def add_metaclass(metaclass): + """Class decorator for creating a class with a metaclass.""" + def wrapper(cls): + orig_vars = cls.__dict__.copy() + orig_vars.pop('__dict__', None) + orig_vars.pop('__weakref__', None) + for slots_var in orig_vars.get('__slots__', ()): + orig_vars.pop(slots_var) + return metaclass(cls.__name__, cls.__bases__, orig_vars) + return wrapper diff --git a/scripts/check_sources.py b/scripts/check_sources.py index 759fce72..71aff299 100755 --- a/scripts/check_sources.py +++ b/scripts/check_sources.py @@ -13,11 +13,11 @@ from __future__ import print_function +import io import os import re import sys import getopt -import cStringIO from os.path import join, splitext, abspath @@ -71,6 +71,9 @@ def check_style_and_encoding(fn, lines): encoding = co.group(1) try: line.decode(encoding) + except AttributeError: + # Python 3 - encoding was already checked + pass except UnicodeDecodeError as err: yield lno+1, "not decodable: %s\n Line: %r" % (err, line) except LookupError as err: @@ -134,7 +137,7 @@ def check_fileheader(fn, lines): yield 0, "no correct license info" ci = -3 - copyright = [s.decode('utf-8') for s in llist[ci:ci+1]] + copyright = llist[ci:ci+1] while copyright and copyright_2_re.match(copyright[0]): ci -= 1 copyright = llist[ci:ci+1] @@ -188,14 +191,14 @@ def main(argv): verbose = '-v' in opts num = 0 - out = cStringIO.StringIO() + out = io.StringIO() # TODO: replace os.walk run with iteration over output of # `svn list -R`. for root, dirs, files in os.walk(path): - if '.svn' in dirs: - dirs.remove('.svn') + if '.hg' in dirs: + dirs.remove('.hg') if '-i' in opts and abspath(root) in opts['-i']: del dirs[:] continue @@ -230,7 +233,7 @@ def main(argv): if not in_pocoo_pkg and checker.only_pkg: continue for lno, msg in checker(fn, lines): - print("%s:%d: %s" % (fn, lno, msg), file=out) + print(u"%s:%d: %s" % (fn, lno, msg), file=out) num += 1 if verbose: print() diff --git a/scripts/detect_missing_analyse_text.py b/scripts/detect_missing_analyse_text.py index ac1da06c..ab58558e 100644 --- a/scripts/detect_missing_analyse_text.py +++ b/scripts/detect_missing_analyse_text.py @@ -17,7 +17,7 @@ def main(): uses[f].append(cls) ret = 0 - for k, v in uses.iteritems(): + for k, v in uses.items(): if len(v) > 1: #print "Multiple for", k, v for i in v: diff --git a/scripts/find_codetags.py b/scripts/find_codetags.py index 7da80e15..f8204e6e 100755 --- a/scripts/find_codetags.py +++ b/scripts/find_codetags.py @@ -121,11 +121,15 @@ def main(): if '-i' in opts and abspath(root) in opts['-i']: del dirs[:] continue - if '.svn' in dirs: - dirs.remove('.svn') + if '.hg' in dirs: + dirs.remove('.hg') + if 'examplefiles' in dirs: + dirs.remove('examplefiles') + if 'dist' in dirs: + dirs.remove('dist') for fn in files: gnum += 1 - if gnum % 50 == 0 and not verbose: + if gnum % 25 == 0 and not verbose: sys.stdout.write('.') sys.stdout.flush() @@ -150,7 +154,7 @@ def main(): print() print("Processed %d of %d files. Found %d tags in %d files." % ( - num, gnum, sum(len(fitem) for fitem in store.itervalues()), len(store))) + num, gnum, sum(len(fitem) for fitem in store.values()), len(store))) if not store: return 0 @@ -194,7 +198,7 @@ td { padding: 2px 5px 2px 5px; '<td class="tag %%(tag)s">%%(tag)s</td>' '<td class="who">%%(who)s</td><td class="what">%%(what)s</td></tr>') - f = file(output, 'w') + f = open(output, 'w') table = '\n'.join(TABLE % fname + '\n'.join(TR % (no % 2,) % entry for no, entry in enumerate(store[fname])) diff --git a/scripts/find_error.py b/scripts/find_error.py index 7a513701..7aaa9bee 100755 --- a/scripts/find_error.py +++ b/scripts/find_error.py @@ -107,7 +107,7 @@ def main(fn, lexer=None, options={}): # already debugged before debug_lexer = True lno = 1 - text = file(fn, 'U').read() + text = open(fn, 'U').read() text = text.strip('\n') + '\n' tokens = [] states = [] diff --git a/scripts/vim2pygments.py b/scripts/vim2pygments.py index 348fb852..42af0bbe 100755 --- a/scripts/vim2pygments.py +++ b/scripts/vim2pygments.py @@ -16,7 +16,7 @@ from __future__ import print_function import sys import re from os import path -from cStringIO import StringIO +from io import StringIO split_re = re.compile(r'(?<!\\)\s+') @@ -767,7 +767,7 @@ TOKENS = { } TOKEN_TYPES = set() -for token in TOKENS.itervalues(): +for token in TOKENS.values(): if not isinstance(token, tuple): token = (token,) for token in token: @@ -838,7 +838,7 @@ def find_colors(code): colors['Normal']['bgcolor'] = bg_color color_map = {} - for token, styles in colors.iteritems(): + for token, styles in colors.items(): if token in TOKENS: tmp = [] if styles.get('noinherit'): @@ -881,7 +881,7 @@ class StyleWriter(object): def write(self, out): self.write_header(out) default_token, tokens = find_colors(self.code) - tokens = tokens.items() + tokens = list(tokens.items()) tokens.sort(lambda a, b: cmp(len(a[0]), len(a[1]))) bg_color = [x[3:] for x in default_token.split() if x.startswith('bg:')] if bg_color: @@ -48,11 +48,6 @@ except ImportError: ] have_setuptools = False -try: - from distutils.command.build_py import build_py_2to3 as build_py -except ImportError: - from distutils.command.build_py import build_py - if have_setuptools: add_keywords = dict( entry_points = { @@ -91,6 +86,5 @@ setup( 'Topic :: Text Processing :: Filters', 'Topic :: Utilities', ], - cmdclass = {'build_py': build_py}, **add_keywords ) diff --git a/tests/run.py b/tests/run.py index 394a8f1b..e87837e5 100644 --- a/tests/run.py +++ b/tests/run.py @@ -16,40 +16,29 @@ from __future__ import print_function import sys, os -if sys.version_info >= (3,): - # copy test suite over to "build/lib" and convert it - print ('Copying and converting sources to build/lib/test...') - from distutils.util import copydir_run_2to3 - testroot = os.path.dirname(__file__) - newroot = os.path.join(testroot, '..', 'build/lib/test') - copydir_run_2to3(testroot, newroot) - # make nose believe that we run from the converted dir - os.chdir(newroot) -else: - # only find tests in this directory - if os.path.dirname(__file__): - os.chdir(os.path.dirname(__file__)) +# only find tests in this directory +if os.path.dirname(__file__): + os.chdir(os.path.dirname(__file__)) try: import nose except ImportError: - print ('nose is required to run the Pygments test suite') + print('nose is required to run the Pygments test suite') sys.exit(1) try: # make sure the current source is first on sys.path sys.path.insert(0, '..') import pygments -except SyntaxError: - print(('Syntax error: %s' % sys.exc_info()[1])) - print ('Please run setup.py build before make test on Python 3') +except SyntaxError as err: + print('Syntax error: %s' % err) sys.exit(1) -except ImportError: - print(('Cannot find Pygments to test: %s' % sys.exc_info()[1])) +except ImportError as err: + print('Cannot find Pygments to test: %s' % err) sys.exit(1) else: - print(('Pygments %s test suite running (Python %s)...' % - (pygments.__version__, sys.version.split()[0]))) + print('Pygments %s test suite running (Python %s)...' % + (pygments.__version__, sys.version.split()[0])) nose.main() diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py index 5a7bb42f..78227d2f 100644 --- a/tests/test_basic_api.py +++ b/tests/test_basic_api.py @@ -16,7 +16,7 @@ from pygments import lexers, formatters, filters, format from pygments.token import _TokenType, Text from pygments.lexer import RegexLexer from pygments.formatters.img import FontNotFound -from pygments.util import StringIO, bytes +from pygments.util import text_type, StringIO, xrange import support @@ -29,7 +29,7 @@ test_content = ''.join(test_content) + '\n' def test_lexer_import_all(): # instantiate every lexer, to see if the token type defs are correct - for x in lexers.LEXERS.keys(): + for x in lexers.LEXERS: c = getattr(lexers, x)() @@ -73,7 +73,7 @@ def test_lexer_classes(): assert isinstance(token[0], _TokenType) if isinstance(token[1], str): print(repr(token[1])) - assert isinstance(token[1], unicode) + assert isinstance(token[1], text_type) txt += token[1] assert txt == test_content, "%s lexer roundtrip failed: %r != %r" % \ (cls.name, test_content, txt) @@ -128,7 +128,7 @@ def test_get_lexers(): ]: yield verify, func, args - for cls, (_, lname, aliases, _, mimetypes) in lexers.LEXERS.iteritems(): + for cls, (_, lname, aliases, _, mimetypes) in lexers.LEXERS.items(): assert cls == lexers.find_lexer_class(lname).__name__ for alias in aliases: @@ -163,7 +163,7 @@ def test_formatter_public_api(): pass inst.format(ts, out) - for formatter, info in formatters.FORMATTERS.iteritems(): + for formatter, info in formatters.FORMATTERS.items(): yield verify, formatter, info def test_formatter_encodings(): @@ -173,7 +173,7 @@ def test_formatter_encodings(): fmt = HtmlFormatter() tokens = [(Text, u"ä")] out = format(tokens, fmt) - assert type(out) is unicode + assert type(out) is text_type assert u"ä" in out # encoding option @@ -202,7 +202,7 @@ def test_formatter_unicode_handling(): if formatter.name != 'Raw tokens': out = format(tokens, inst) if formatter.unicodeoutput: - assert type(out) is unicode + assert type(out) is text_type inst = formatter(encoding='utf-8') out = format(tokens, inst) @@ -214,7 +214,7 @@ def test_formatter_unicode_handling(): out = format(tokens, inst) assert type(out) is bytes, '%s: %r' % (formatter, out) - for formatter, info in formatters.FORMATTERS.iteritems(): + for formatter, info in formatters.FORMATTERS.items(): yield verify, formatter @@ -242,7 +242,7 @@ class FiltersTest(unittest.TestCase): 'whitespace': {'spaces': True, 'tabs': True, 'newlines': True}, 'highlight': {'names': ['isinstance', 'lexers', 'x']}, } - for x in filters.FILTERS.keys(): + for x in filters.FILTERS: lx = lexers.PythonLexer() lx.add_filter(x, **filter_args.get(x, {})) fp = open(TESTFILE, 'rb') diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index cbb05db7..ef14661c 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -9,11 +9,12 @@ # Test the command line interface -import sys, os +import io +import sys import unittest -import StringIO from pygments import highlight +from pygments.util import StringIO from pygments.cmdline import main as cmdline_main import support @@ -24,8 +25,8 @@ TESTFILE, TESTDIR = support.location(__file__) def run_cmdline(*args): saved_stdout = sys.stdout saved_stderr = sys.stderr - new_stdout = sys.stdout = StringIO.StringIO() - new_stderr = sys.stderr = StringIO.StringIO() + new_stdout = sys.stdout = StringIO() + new_stderr = sys.stderr = StringIO() try: ret = cmdline_main(["pygmentize"] + list(args)) finally: diff --git a/tests/test_examplefiles.py b/tests/test_examplefiles.py index ca68e7d4..0547ffd3 100644 --- a/tests/test_examplefiles.py +++ b/tests/test_examplefiles.py @@ -12,11 +12,11 @@ from __future__ import print_function import os import pprint import difflib -import cPickle as pickle +import pickle from pygments.lexers import get_lexer_for_filename, get_lexer_by_name from pygments.token import Error -from pygments.util import ClassNotFound, b +from pygments.util import ClassNotFound STORE_OUTPUT = False @@ -65,8 +65,8 @@ def check_lexer(lx, absfn, outfn): text = fp.read() finally: fp.close() - text = text.replace(b('\r\n'), b('\n')) - text = text.strip(b('\n')) + b('\n') + text = text.replace(b'\r\n', b'\n') + text = text.strip(b'\n') + b'\n' try: text = text.decode('utf-8') if text.startswith(u'\ufeff'): diff --git a/tests/test_html_formatter.py b/tests/test_html_formatter.py index a0489602..91225cd3 100644 --- a/tests/test_html_formatter.py +++ b/tests/test_html_formatter.py @@ -9,23 +9,23 @@ from __future__ import print_function +import io import os import re import unittest -import StringIO import tempfile from os.path import join, dirname, isfile +from pygments.util import StringIO from pygments.lexers import PythonLexer from pygments.formatters import HtmlFormatter, NullFormatter from pygments.formatters.html import escape_html -from pygments.util import uni_open import support TESTFILE, TESTDIR = support.location(__file__) -fp = uni_open(TESTFILE, encoding='utf-8') +fp = io.open(TESTFILE, encoding='utf-8') try: tokensource = list(PythonLexer().get_tokens(fp.read())) finally: @@ -35,11 +35,11 @@ finally: class HtmlFormatterTest(unittest.TestCase): def test_correct_output(self): hfmt = HtmlFormatter(nowrap=True) - houtfile = StringIO.StringIO() + houtfile = StringIO() hfmt.format(tokensource, houtfile) nfmt = NullFormatter() - noutfile = StringIO.StringIO() + noutfile = StringIO() nfmt.format(tokensource, noutfile) stripped_html = re.sub('<.*?>', '', houtfile.getvalue()) @@ -76,13 +76,13 @@ class HtmlFormatterTest(unittest.TestCase): dict(linenos=True, full=True), dict(linenos=True, full=True, noclasses=True)]: - outfile = StringIO.StringIO() + outfile = StringIO() fmt = HtmlFormatter(**optdict) fmt.format(tokensource, outfile) def test_linenos(self): optdict = dict(linenos=True) - outfile = StringIO.StringIO() + outfile = StringIO() fmt = HtmlFormatter(**optdict) fmt.format(tokensource, outfile) html = outfile.getvalue() @@ -90,7 +90,7 @@ class HtmlFormatterTest(unittest.TestCase): def test_linenos_with_startnum(self): optdict = dict(linenos=True, linenostart=5) - outfile = StringIO.StringIO() + outfile = StringIO() fmt = HtmlFormatter(**optdict) fmt.format(tokensource, outfile) html = outfile.getvalue() @@ -98,7 +98,7 @@ class HtmlFormatterTest(unittest.TestCase): def test_lineanchors(self): optdict = dict(lineanchors="foo") - outfile = StringIO.StringIO() + outfile = StringIO() fmt = HtmlFormatter(**optdict) fmt.format(tokensource, outfile) html = outfile.getvalue() @@ -106,7 +106,7 @@ class HtmlFormatterTest(unittest.TestCase): def test_lineanchors_with_startnum(self): optdict = dict(lineanchors="foo", linenostart=5) - outfile = StringIO.StringIO() + outfile = StringIO() fmt = HtmlFormatter(**optdict) fmt.format(tokensource, outfile) html = outfile.getvalue() @@ -174,7 +174,7 @@ class HtmlFormatterTest(unittest.TestCase): # anymore in the actual source fmt = HtmlFormatter(tagsfile='support/tags', lineanchors='L', tagurlformat='%(fname)s%(fext)s') - outfile = StringIO.StringIO() + outfile = StringIO() fmt.format(tokensource, outfile) self.assertTrue('<a href="test_html_formatter.py#L-165">test_ctags</a>' in outfile.getvalue()) diff --git a/tests/test_token.py b/tests/test_token.py index 26cc772e..c5cc4990 100644 --- a/tests/test_token.py +++ b/tests/test_token.py @@ -36,11 +36,11 @@ class TokenTest(unittest.TestCase): stp = token.STANDARD_TYPES.copy() stp[token.Token] = '---' # Token and Text do conflict, that is okay t = {} - for k, v in stp.iteritems(): + for k, v in stp.items(): t.setdefault(v, []).append(k) if len(t) == len(stp): return # Okay - for k, v in t.iteritems(): + for k, v in t.items(): if len(v) > 1: self.fail("%r has more than one key: %r" % (k, v)) |