diff options
author | Georg Brandl <georg@python.org> | 2014-01-26 11:41:50 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2014-01-26 11:41:50 +0100 |
commit | 76830a64b219cc2fd76b7ae7de12b717375a6793 (patch) | |
tree | 908b6a9d75b1abe18982c0ddb728a34afdff07bf | |
parent | b7cfc29dd2ac8279e8689e62c83820e77a6a41f4 (diff) | |
parent | 5c7cf5b852b2e69bbe9b9797550ebc0fecd1c45b (diff) | |
download | pygments-76830a64b219cc2fd76b7ae7de12b717375a6793.tar.gz |
Merged in hoelzro/pygments-main (pull request #277)
Improvements to Perl 6 detector
-rw-r--r-- | pygments/lexers/agile.py | 64 |
1 files changed, 38 insertions, 26 deletions
diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index 89b2d2da..a49289dc 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -1021,7 +1021,7 @@ class PerlLexer(RegexLexer): def analyse_text(text): if shebang_matches(text, r'perl'): return True - if 'my $' in text: + if re.search('(?:my|our)\s+[$@%(]', text): return 0.9 @@ -1940,7 +1940,7 @@ class Perl6Lexer(ExtendedRegexLexer): mimetypes = ['text/x-perl6', 'application/x-perl6'] flags = re.MULTILINE | re.DOTALL | re.UNICODE - PERL6_IDENTIFIER_RANGE = "['a-zA-Z0-9_:-]" + PERL6_IDENTIFIER_RANGE = "['a-zA-Z0-9_:-]" # if you alter this, search for a copy made of it below PERL6_KEYWORDS = ( 'BEGIN', 'CATCH', 'CHECK', 'CONTROL', 'END', 'ENTER', 'FIRST', 'INIT', @@ -2125,12 +2125,16 @@ class Perl6Lexer(ExtendedRegexLexer): end_pos = next_close_pos + if end_pos < 0: # if we didn't find a closer, just highlight the + # rest of the text in this class + end_pos = len(text) + if adverbs is not None and re.search(r':to\b', adverbs): heredoc_terminator = text[match.start('delimiter') + n_chars : end_pos] - end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) + r'\s*$', text[ match.end('delimiter') : ], re.MULTILINE) + end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) + r'\s*$', text[ end_pos : ], re.MULTILINE) if end_heredoc: - end_pos = match.end('delimiter') + end_heredoc.end() + end_pos += end_heredoc.end() else: end_pos = len(text) @@ -2207,7 +2211,7 @@ class Perl6Lexer(ExtendedRegexLexer): (r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex), (r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex), (r'm\w+(?=\()', Name), - (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])(?P=first_char)*)', brackets_callback(String.Regex)), + (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z_:\s])(?P=first_char)*)', brackets_callback(String.Regex)), (r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', String.Regex), (r'<[^\s=].*?\S>', String), (_build_word_match(PERL6_OPERATORS), Operator), @@ -2245,9 +2249,6 @@ class Perl6Lexer(ExtendedRegexLexer): } def analyse_text(text): - # disabled for now; the lexer is not bug-free and will loop sometimes, - # so let's be sure to use it only for "real" Perl 6 code. - return False def strip_pod(lines): in_pod = False stripped_lines = [] @@ -2262,30 +2263,41 @@ class Perl6Lexer(ExtendedRegexLexer): return stripped_lines + # XXX handle block comments lines = text.splitlines() lines = strip_pod(lines) text = '\n'.join(lines) - if shebang_matches(text, r'perl6|rakudo|niecza'): + if shebang_matches(text, r'perl6|rakudo|niecza|pugs'): return True - if 'use v6' in text: - return 0.91 # 0.01 greater than Perl says for 'my $' - if re.search(r'[$@%]\*[A-Z]+', text): # Perl 6-style globals ($*OS) - return 0.91 - if re.search(r'[$@%]\?[A-Z]+', text): # Perl 6 compiler variables ($?PACKAGE) - return 0.91 - if re.search(r'[$@%][!.][A-Za-z0-9_-]+', text): # Perl 6 member variables - return 0.91 - - for line in text.splitlines(): - if re.match(r'\s*(?:my|our)?\s*module', line): # module declarations - return 0.91 - if re.match(r'\s*(?:my|our)?\s*role', line): # role declarations - return 0.91 - if re.match(r'\s*(?:my|our)?\s*class\b', line): # class declarations - return 0.91 - return False + saw_perl_decl = False + rating = False + + # check for my/our/has declarations + # copied PERL6_IDENTIFIER_RANGE from above; not happy about that + if re.search("(?:my|our|has)\s+(?:['a-zA-Z0-9_:-]+\s+)?[$@%&(]", text): + rating = 0.8 + saw_perl_decl = True + + for line in lines: + line = re.sub('#.*', '', line) + if re.match('^\s*$', line): + continue + + # match v6; use v6; use v6.0; use v6.0.0; + if re.match('^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line): + return True + # match class, module, role, enum, grammar declarations + class_decl = re.match('^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line) + if class_decl: + if saw_perl_decl or class_decl.group('scope') is not None: + return True + rating = 0.05 + continue + break + + return rating def __init__(self, **options): super(Perl6Lexer, self).__init__(**options) |