diff options
author | Jean Abou-Samra <jean@abou-samra.fr> | 2022-01-31 22:00:40 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-31 22:00:40 +0100 |
commit | c81e97c3c281492ac6fdf4abc841ac5b63f3f303 (patch) | |
tree | 8d52ec56246f4d08a4eb0d16317cc5c261ebfd05 | |
parent | b1edee0c8f736276a86e30d8edaed1f581e009d0 (diff) | |
download | pygments-git-c81e97c3c281492ac6fdf4abc841ac5b63f3f303.tar.gz |
Remove now redundant re.UNICODE and (?u) (#2058)
30 files changed, 40 insertions, 74 deletions
diff --git a/pygments/lexers/bare.py b/pygments/lexers/bare.py index 880c0b30..4ca07010 100644 --- a/pygments/lexers/bare.py +++ b/pygments/lexers/bare.py @@ -26,8 +26,6 @@ class BareLexer(RegexLexer): filenames = ['*.bare'] aliases = ['bare'] - flags = re.MULTILINE | re.UNICODE - keywords = [ 'type', 'enum', diff --git a/pygments/lexers/capnproto.py b/pygments/lexers/capnproto.py index f15297fa..73300eef 100644 --- a/pygments/lexers/capnproto.py +++ b/pygments/lexers/capnproto.py @@ -26,8 +26,6 @@ class CapnProtoLexer(RegexLexer): filenames = ['*.capnp'] aliases = ['capnp'] - flags = re.MULTILINE | re.UNICODE - tokens = { 'root': [ (r'#.*?$', Comment.Single), diff --git a/pygments/lexers/cddl.py b/pygments/lexers/cddl.py index 41b88b88..3dbaa368 100644 --- a/pygments/lexers/cddl.py +++ b/pygments/lexers/cddl.py @@ -114,8 +114,6 @@ class CddlLexer(RegexLexer): _re_uint = r"(?:0b[01]+|0x[0-9a-fA-F]+|[1-9]\d*|0(?!\d))" _re_int = r"-?" + _re_uint - flags = re.UNICODE | re.MULTILINE - tokens = { "commentsandwhitespace": [(r"\s+", Whitespace), (r";.+$", Comment.Single)], "root": [ diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py index 7619f8ad..5ffd007c 100644 --- a/pygments/lexers/dotnet.py +++ b/pygments/lexers/dotnet.py @@ -51,7 +51,7 @@ class CSharpLexer(RegexLexer): filenames = ['*.cs'] mimetypes = ['text/x-csharp'] # inferred - flags = re.MULTILINE | re.DOTALL | re.UNICODE + flags = re.MULTILINE | re.DOTALL # for the range of allowed unicode characters in identifiers, see # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf @@ -77,7 +77,7 @@ class CSharpLexer(RegexLexer): (r'^([ \t]*)((?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type r'(' + cs_ident + ')' # method name r'(\s*)(\()', # signature start - bygroups(Whitespace, using(this), Name.Function, Whitespace, + bygroups(Whitespace, using(this), Name.Function, Whitespace, Punctuation)), (r'^(\s*)(\[.*?\])', bygroups(Whitespace, Name.Attribute)), (r'[^\S\n]+', Whitespace), @@ -94,7 +94,7 @@ class CSharpLexer(RegexLexer): r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number), (r'(#)([ \t]*)(if|endif|else|elif|define|undef|' r'line|error|warning|region|endregion|pragma)\b(.*?)(\n)', - bygroups(Comment.Preproc, Whitespace, Comment.Preproc, + bygroups(Comment.Preproc, Whitespace, Comment.Preproc, Comment.Preproc, Whitespace)), (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Whitespace, Keyword)), @@ -166,7 +166,7 @@ class NemerleLexer(RegexLexer): filenames = ['*.n'] mimetypes = ['text/x-nemerle'] # inferred - flags = re.MULTILINE | re.DOTALL | re.UNICODE + flags = re.MULTILINE | re.DOTALL # for the range of allowed unicode characters in identifiers, see # http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf @@ -206,7 +206,7 @@ class NemerleLexer(RegexLexer): 'splice-string2'), (r'<#', String, 'recursive-string'), - (r'(<\[)(\s*)(' + cs_ident + ':)?', bygroups(Keyword, + (r'(<\[)(\s*)(' + cs_ident + ':)?', bygroups(Keyword, Whitespace, Keyword)), (r'\]\>', Keyword), @@ -645,7 +645,7 @@ class FSharpLexer(RegexLexer): (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), (r'(#)([ \t]*)(if|endif|else|line|nowarn|light|\d+)\b(.*?)(\n)', - bygroups(Comment.Preproc, Whitespace, Comment.Preproc, + bygroups(Comment.Preproc, Whitespace, Comment.Preproc, Comment.Preproc, Whitespace)), (r"[^\W\d][\w']*", Name), diff --git a/pygments/lexers/ezhil.py b/pygments/lexers/ezhil.py index 4b679151..c58b91ce 100644 --- a/pygments/lexers/ezhil.py +++ b/pygments/lexers/ezhil.py @@ -26,7 +26,6 @@ class EzhilLexer(RegexLexer): aliases = ['ezhil'] filenames = ['*.n'] mimetypes = ['text/x-ezhil'] - flags = re.MULTILINE | re.UNICODE # Refer to tamil.utf8.tamil_letters from open-tamil for a stricter version of this. # This much simpler version is close enough, and includes combining marks. _TALETTERS = '[a-zA-Z_]|[\u0b80-\u0bff]' @@ -58,8 +57,8 @@ class EzhilLexer(RegexLexer): ], 'literal': [ (r'".*?"', String), - (r'(?u)\d+((\.\d*)?[eE][+-]?\d+|\.\d*)', Number.Float), - (r'(?u)\d+', Number.Integer), + (r'\d+((\.\d*)?[eE][+-]?\d+|\.\d*)', Number.Float), + (r'\d+', Number.Integer), ] } diff --git a/pygments/lexers/factor.py b/pygments/lexers/factor.py index 7b59197c..e47c0eb7 100644 --- a/pygments/lexers/factor.py +++ b/pygments/lexers/factor.py @@ -28,8 +28,6 @@ class FactorLexer(RegexLexer): filenames = ['*.factor'] mimetypes = ['text/x-factor'] - flags = re.MULTILINE | re.UNICODE - builtin_kernel = words(( '-rot', '2bi', '2bi@', '2bi*', '2curry', '2dip', '2drop', '2dup', '2keep', '2nip', '2over', '2tri', '2tri@', '2tri*', '3bi', '3curry', '3dip', '3drop', '3dup', '3keep', diff --git a/pygments/lexers/futhark.py b/pygments/lexers/futhark.py index 5c120137..7f481ba8 100644 --- a/pygments/lexers/futhark.py +++ b/pygments/lexers/futhark.py @@ -32,8 +32,6 @@ class FutharkLexer(RegexLexer): filenames = ['*.fut'] mimetypes = ['text/x-futhark'] - flags = re.MULTILINE | re.UNICODE - num_types = ('i8', 'i16', 'i32', 'i64', 'u8', 'u16', 'u32', 'u64', 'f32', 'f64') other_types = ('bool', ) diff --git a/pygments/lexers/go.py b/pygments/lexers/go.py index 5af0b197..38936093 100644 --- a/pygments/lexers/go.py +++ b/pygments/lexers/go.py @@ -28,8 +28,6 @@ class GoLexer(RegexLexer): aliases = ['go', 'golang'] mimetypes = ['text/x-gosrc'] - flags = re.MULTILINE | re.UNICODE - tokens = { 'root': [ (r'\n', Whitespace), diff --git a/pygments/lexers/grammar_notation.py b/pygments/lexers/grammar_notation.py index e0c7d491..a2f1bb3e 100644 --- a/pygments/lexers/grammar_notation.py +++ b/pygments/lexers/grammar_notation.py @@ -144,8 +144,6 @@ class JsgfLexer(RegexLexer): filenames = ['*.jsgf'] mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf'] - flags = re.MULTILINE | re.UNICODE - tokens = { 'root': [ include('comments'), diff --git a/pygments/lexers/haskell.py b/pygments/lexers/haskell.py index d50ccb42..4bc167f5 100644 --- a/pygments/lexers/haskell.py +++ b/pygments/lexers/haskell.py @@ -35,8 +35,6 @@ class HaskellLexer(RegexLexer): filenames = ['*.hs'] mimetypes = ['text/x-haskell'] - flags = re.MULTILINE | re.UNICODE - reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else', 'family', 'if', 'in', 'infix[lr]?', 'instance', 'let', 'newtype', 'of', 'then', 'type', 'where', '_') diff --git a/pygments/lexers/html.py b/pygments/lexers/html.py index 86ef73ae..ed28731a 100644 --- a/pygments/lexers/html.py +++ b/pygments/lexers/html.py @@ -195,7 +195,7 @@ class XmlLexer(RegexLexer): Generic lexer for XML (eXtensible Markup Language). """ - flags = re.MULTILINE | re.DOTALL | re.UNICODE + flags = re.MULTILINE | re.DOTALL name = 'XML' aliases = ['xml'] @@ -208,7 +208,7 @@ class XmlLexer(RegexLexer): 'root': [ ('[^<&]+', Text), (r'&\S*?;', Name.Entity), - (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc), + (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc), (r'<!--(.|\n)*?-->', Comment.Multiline), (r'<\?.*?\?>', Comment.Preproc), ('<![^>]*>', Comment.Preproc), diff --git a/pygments/lexers/int_fiction.py b/pygments/lexers/int_fiction.py index 89f2109b..16fd1217 100644 --- a/pygments/lexers/int_fiction.py +++ b/pygments/lexers/int_fiction.py @@ -30,7 +30,7 @@ class Inform6Lexer(RegexLexer): aliases = ['inform6', 'i6'] filenames = ['*.inf'] - flags = re.MULTILINE | re.DOTALL | re.UNICODE + flags = re.MULTILINE | re.DOTALL _name = r'[a-zA-Z_]\w*' @@ -537,7 +537,7 @@ class Inform7Lexer(RegexLexer): aliases = ['inform7', 'i7'] filenames = ['*.ni', '*.i7x'] - flags = re.MULTILINE | re.DOTALL | re.UNICODE + flags = re.MULTILINE | re.DOTALL _dash = Inform6Lexer._dash _dquote = Inform6Lexer._dquote diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py index e8012819..1dc0c57d 100644 --- a/pygments/lexers/javascript.py +++ b/pygments/lexers/javascript.py @@ -42,7 +42,7 @@ class JavascriptLexer(RegexLexer): mimetypes = ['application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript'] - flags = re.DOTALL | re.UNICODE | re.MULTILINE + flags = re.DOTALL | re.MULTILINE tokens = { 'commentsandwhitespace': [ @@ -1439,7 +1439,7 @@ class JuttleLexer(RegexLexer): mimetypes = ['application/juttle', 'application/x-juttle', 'text/x-juttle', 'text/juttle'] - flags = re.DOTALL | re.UNICODE | re.MULTILINE + flags = re.DOTALL | re.MULTILINE tokens = { 'commentsandwhitespace': [ @@ -1525,7 +1525,7 @@ class NodeConsoleLexer(Lexer): for match in line_re.finditer(text): line = match.group() if line.startswith('> '): - insertions.append((len(curcode), + insertions.append((len(curcode), [(0, Generic.Prompt, line[:2])])) curcode += line[2:] @@ -1534,21 +1534,21 @@ class NodeConsoleLexer(Lexer): code = line.lstrip('.') lead = len(line) - len(code) - insertions.append((len(curcode), + insertions.append((len(curcode), [(0, Generic.Prompt, line[:lead])])) curcode += code else: if curcode: - yield from do_insertions(insertions, + yield from do_insertions(insertions, jslexer.get_tokens_unprocessed(curcode)) curcode = '' insertions = [] - yield from do_insertions([], + yield from do_insertions([], jslexer.get_tokens_unprocessed(line)) if curcode: - yield from do_insertions(insertions, + yield from do_insertions(insertions, jslexer.get_tokens_unprocessed(curcode)) diff --git a/pygments/lexers/julia.py b/pygments/lexers/julia.py index f8e939f4..d0a7f536 100644 --- a/pygments/lexers/julia.py +++ b/pygments/lexers/julia.py @@ -38,8 +38,6 @@ class JuliaLexer(RegexLexer): filenames = ['*.jl'] mimetypes = ['text/x-julia', 'application/x-julia'] - flags = re.MULTILINE | re.UNICODE - tokens = { 'root': [ (r'\n', Text), diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index f20ecf94..721aee3a 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -33,7 +33,7 @@ class JavaLexer(RegexLexer): filenames = ['*.java'] mimetypes = ['text/x-java'] - flags = re.MULTILINE | re.DOTALL | re.UNICODE + flags = re.MULTILINE | re.DOTALL tokens = { 'root': [ @@ -272,7 +272,7 @@ class ScalaLexer(RegexLexer): (r'\b(package)(\s+)', bygroups(Keyword, Text), 'package'), (r'\b(given)\b(\s*)(%s)' % idUpper, bygroups(Keyword, Text, Name.Class)), - (r'\b(given)\b(\s*)(%s)?' % anyId, + (r'\b(given)\b(\s*)(%s)?' % anyId, bygroups(Keyword, Text, Name)), ], 'inheritance': [ @@ -296,7 +296,7 @@ class ScalaLexer(RegexLexer): ], 'punctuation': [ (r'[{}()\[\];,.]', Punctuation), - (r'(?<!:):(?!:)', Punctuation), + (r'(?<!:):(?!:)', Punctuation), ], 'keywords': [ (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword), @@ -334,7 +334,7 @@ class ScalaLexer(RegexLexer): (r'(\.)(type)\b', bygroups(Punctuation, Keyword)), ], 'inline': [ - # inline is a soft modifier, only highlighted if followed by if, + # inline is a soft modifier, only highlighted if followed by if, # match or parameters. (r'\b(inline)(?=\s+(%s|%s)\s*:)' % (plainid, backQuotedId), Keyword), @@ -1091,7 +1091,7 @@ class KotlinLexer(RegexLexer): filenames = ['*.kt', '*.kts'] mimetypes = ['text/x-kotlin'] - flags = re.MULTILINE | re.DOTALL | re.UNICODE + flags = re.MULTILINE | re.DOTALL kt_name = ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', @@ -1214,7 +1214,7 @@ class KotlinLexer(RegexLexer): 'string_common': [ (r'\\\\', String), # escaped backslash (r'\\"', String), # escaped quote - (r'\\', String), # bare backslash + (r'\\', String), # bare backslash (r'\$\{', String.Interpol, 'interpolation'), (r'(\$)(\w+)', bygroups(String.Interpol, Name)), (r'[^\\"$]+', String) diff --git a/pygments/lexers/lisp.py b/pygments/lexers/lisp.py index 798907df..d523b36d 100644 --- a/pygments/lexers/lisp.py +++ b/pygments/lexers/lisp.py @@ -1299,7 +1299,7 @@ class RacketLexer(RegexLexer): (r'#\|', Comment.Multiline, 'block-comment'), # Whitespaces - (r'(?u)\s+', Text), + (r'\s+', Text), # Numbers: Keep in mind Racket reader hash prefixes, which # can denote the base or the type. These don't map neatly @@ -1348,7 +1348,7 @@ class RacketLexer(RegexLexer): (r'#(true|false|[tTfF])', Name.Constant, '#pop'), # Keyword argument names (e.g. #:keyword) - (r'(?u)#:%s' % _symbol, Keyword.Declaration, '#pop'), + (r'#:%s' % _symbol, Keyword.Declaration, '#pop'), # Reader extensions (r'(#lang |#!)(\S+)', @@ -1377,9 +1377,9 @@ class RacketLexer(RegexLexer): (r'quasiquote(?=[%s])' % _delimiters, Keyword, ('#pop', 'quasiquoted-datum')), (_opening_parenthesis, Punctuation, ('#pop', 'unquoted-list')), - (words(_keywords, prefix='(?u)', suffix='(?=[%s])' % _delimiters), + (words(_keywords, suffix='(?=[%s])' % _delimiters), Keyword, '#pop'), - (words(_builtins, prefix='(?u)', suffix='(?=[%s])' % _delimiters), + (words(_builtins, suffix='(?=[%s])' % _delimiters), Name.Builtin, '#pop'), (_symbol, Name, '#pop'), include('datum*') @@ -1435,7 +1435,7 @@ class NewLispLexer(RegexLexer): filenames = ['*.lsp', '*.nl', '*.kif'] mimetypes = ['text/x-newlisp', 'application/x-newlisp'] - flags = re.IGNORECASE | re.MULTILINE | re.UNICODE + flags = re.IGNORECASE | re.MULTILINE # list of built-in functions for newLISP version 10.3 builtins = ( diff --git a/pygments/lexers/meson.py b/pygments/lexers/meson.py index 233c22f6..a4a343c5 100644 --- a/pygments/lexers/meson.py +++ b/pygments/lexers/meson.py @@ -48,8 +48,6 @@ class MesonLexer(RegexLexer): filenames = ['meson.build', 'meson_options.txt'] mimetypes = ['text/x-meson'] - flags = re.MULTILINE | re.UNICODE - tokens = { 'root': [ (r'#.*?$', Comment), diff --git a/pygments/lexers/nimrod.py b/pygments/lexers/nimrod.py index 74f73f1e..a0a45c73 100644 --- a/pygments/lexers/nimrod.py +++ b/pygments/lexers/nimrod.py @@ -29,7 +29,7 @@ class NimrodLexer(RegexLexer): filenames = ['*.nim', '*.nimrod'] mimetypes = ['text/x-nim'] - flags = re.MULTILINE | re.IGNORECASE | re.UNICODE + flags = re.MULTILINE | re.IGNORECASE def underscorize(words): newWords = [] diff --git a/pygments/lexers/nix.py b/pygments/lexers/nix.py index 412280f7..6414cf82 100644 --- a/pygments/lexers/nix.py +++ b/pygments/lexers/nix.py @@ -29,8 +29,6 @@ class NixLexer(RegexLexer): filenames = ['*.nix'] mimetypes = ['text/x-nix'] - flags = re.MULTILINE | re.UNICODE - keywords = ['rec', 'with', 'let', 'in', 'inherit', 'assert', 'if', 'else', 'then', '...'] builtins = ['import', 'abort', 'baseNameOf', 'dirOf', 'isNull', 'builtins', diff --git a/pygments/lexers/perl.py b/pygments/lexers/perl.py index 35d29a77..3609bc00 100644 --- a/pygments/lexers/perl.py +++ b/pygments/lexers/perl.py @@ -234,7 +234,7 @@ class Perl6Lexer(ExtendedRegexLexer): '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod', '*.rakutest', '*.rakudoc'] mimetypes = ['text/x-perl6', 'application/x-perl6'] - flags = re.MULTILINE | re.DOTALL | re.UNICODE + flags = re.MULTILINE | re.DOTALL PERL6_IDENTIFIER_RANGE = r"['\w:-]" diff --git a/pygments/lexers/prolog.py b/pygments/lexers/prolog.py index a9e743ce..90031a8a 100644 --- a/pygments/lexers/prolog.py +++ b/pygments/lexers/prolog.py @@ -26,8 +26,6 @@ class PrologLexer(RegexLexer): filenames = ['*.ecl', '*.prolog', '*.pro', '*.pl'] mimetypes = ['text/x-prolog'] - flags = re.UNICODE | re.MULTILINE - tokens = { 'root': [ (r'/\*', Comment.Multiline, 'nested-comment'), diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index a83bc92a..3b647fc8 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -61,8 +61,6 @@ class PythonLexer(RegexLexer): mimetypes = ['text/x-python', 'application/x-python', 'text/x-python3', 'application/x-python3'] - flags = re.MULTILINE | re.UNICODE - uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue) def innerstring_rules(ttype): @@ -214,7 +212,7 @@ class PythonLexer(RegexLexer): (r'(^[ \t]*)' # at beginning of line + possible indentation r'(match|case)\b' # a possible keyword r'(?![ \t]*(?:' # not followed by... - r'[:,;=^&|@~)\]}]|(?:' + # characters and keywords that mean this isn't + r'[:,;=^&|@~)\]}]|(?:' + # characters and keywords that mean this isn't r'|'.join(keyword.kwlist) + r')\b))', # pattern matching bygroups(Text, Keyword), 'soft-keywords-inner'), ], diff --git a/pygments/lexers/smithy.py b/pygments/lexers/smithy.py index ac30b2d5..a9b13ef8 100644 --- a/pygments/lexers/smithy.py +++ b/pygments/lexers/smithy.py @@ -27,7 +27,6 @@ class SmithyLexer(RegexLexer): filenames = ['*.smithy'] aliases = ['smithy'] - flags = re.MULTILINE | re.UNICODE unquoted = r'[A-Za-z0-9_\.#$-]+' identifier = r"[A-Za-z0-9_\.#$-]+" diff --git a/pygments/lexers/solidity.py b/pygments/lexers/solidity.py index 45918059..2b406173 100644 --- a/pygments/lexers/solidity.py +++ b/pygments/lexers/solidity.py @@ -29,8 +29,6 @@ class SolidityLexer(RegexLexer): filenames = ['*.sol'] mimetypes = [] - flags = re.MULTILINE | re.UNICODE - datatype = ( r'\b(address|bool|(?:(?:bytes|hash|int|string|uint)(?:8|16|24|32|40|48|56|64' r'|72|80|88|96|104|112|120|128|136|144|152|160|168|176|184|192|200|208' diff --git a/pygments/lexers/spice.py b/pygments/lexers/spice.py index 7f234419..a151c03d 100644 --- a/pygments/lexers/spice.py +++ b/pygments/lexers/spice.py @@ -28,8 +28,6 @@ class SpiceLexer(RegexLexer): aliases = ['spice', 'spicelang'] mimetypes = ['text/x-spice'] - flags = re.MULTILINE | re.UNICODE - tokens = { 'root': [ (r'\n', Whitespace), diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index e6633663..18cd9c20 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -513,8 +513,8 @@ class TransactSqlLexer(RegexLexer): filenames = ['*.sql'] mimetypes = ['text/x-tsql'] - # Use re.UNICODE to allow non ASCII letters in names. - flags = re.IGNORECASE | re.UNICODE + flags = re.IGNORECASE + tokens = { 'root': [ (r'\s+', Whitespace), diff --git a/pygments/lexers/theorem.py b/pygments/lexers/theorem.py index ca26f4ee..f5096ef6 100644 --- a/pygments/lexers/theorem.py +++ b/pygments/lexers/theorem.py @@ -29,7 +29,7 @@ class CoqLexer(RegexLexer): filenames = ['*.v'] mimetypes = ['text/x-coq'] - flags = re.UNICODE + flags = 0 # no re.MULTILINE keywords1 = ( # Vernacular commands @@ -391,8 +391,6 @@ class LeanLexer(RegexLexer): filenames = ['*.lean'] mimetypes = ['text/x-lean'] - flags = re.MULTILINE | re.UNICODE - tokens = { 'root': [ (r'\s+', Text), diff --git a/pygments/lexers/webmisc.py b/pygments/lexers/webmisc.py index 891fc9fc..28ab9754 100644 --- a/pygments/lexers/webmisc.py +++ b/pygments/lexers/webmisc.py @@ -111,7 +111,7 @@ class XQueryLexer(ExtendedRegexLexer): # aposattrcontentchar # x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] - flags = re.DOTALL | re.MULTILINE | re.UNICODE + flags = re.DOTALL | re.MULTILINE def punctuation_root_callback(lexer, match, ctx): yield match.start(), Punctuation, match.group(1) diff --git a/pygments/util.py b/pygments/util.py index 658c82ec..8032962d 100644 --- a/pygments/util.py +++ b/pygments/util.py @@ -23,7 +23,7 @@ doctype_lookup_re = re.compile(r''' [^>]*> ''', re.DOTALL | re.MULTILINE | re.VERBOSE) tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>', - re.UNICODE | re.IGNORECASE | re.DOTALL | re.MULTILINE) + re.IGNORECASE | re.DOTALL | re.MULTILINE) xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I) diff --git a/scripts/check_sources.py b/scripts/check_sources.py index 612a7ef1..1feb1a33 100755 --- a/scripts/check_sources.py +++ b/scripts/check_sources.py @@ -34,9 +34,9 @@ def checker(*suffixes, **kwds): name_mail_re = r'[\w ]+(<.*?>)?' copyright_re = re.compile(r'^ :copyright: Copyright 2006-2022 by ' - r'the Pygments team, see AUTHORS\.$', re.UNICODE) + r'the Pygments team, see AUTHORS\.$') copyright_2_re = re.compile(r'^ %s(, %s)*[,.]$' % - (name_mail_re, name_mail_re), re.UNICODE) + (name_mail_re, name_mail_re)) is_const_re = re.compile(r'if.*?==\s+(None|False|True)\b') misspellings = ["developement", "adress", "verificate", # ALLOW-MISSPELLING |