summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2014-01-26 11:41:50 +0100
committerGeorg Brandl <georg@python.org>2014-01-26 11:41:50 +0100
commit76830a64b219cc2fd76b7ae7de12b717375a6793 (patch)
tree908b6a9d75b1abe18982c0ddb728a34afdff07bf
parentb7cfc29dd2ac8279e8689e62c83820e77a6a41f4 (diff)
parent5c7cf5b852b2e69bbe9b9797550ebc0fecd1c45b (diff)
downloadpygments-76830a64b219cc2fd76b7ae7de12b717375a6793.tar.gz
Merged in hoelzro/pygments-main (pull request #277)
Improvements to Perl 6 detector
-rw-r--r--pygments/lexers/agile.py64
1 files changed, 38 insertions, 26 deletions
diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py
index 89b2d2da..a49289dc 100644
--- a/pygments/lexers/agile.py
+++ b/pygments/lexers/agile.py
@@ -1021,7 +1021,7 @@ class PerlLexer(RegexLexer):
def analyse_text(text):
if shebang_matches(text, r'perl'):
return True
- if 'my $' in text:
+ if re.search('(?:my|our)\s+[$@%(]', text):
return 0.9
@@ -1940,7 +1940,7 @@ class Perl6Lexer(ExtendedRegexLexer):
mimetypes = ['text/x-perl6', 'application/x-perl6']
flags = re.MULTILINE | re.DOTALL | re.UNICODE
- PERL6_IDENTIFIER_RANGE = "['a-zA-Z0-9_:-]"
+ PERL6_IDENTIFIER_RANGE = "['a-zA-Z0-9_:-]" # if you alter this, search for a copy made of it below
PERL6_KEYWORDS = (
'BEGIN', 'CATCH', 'CHECK', 'CONTROL', 'END', 'ENTER', 'FIRST', 'INIT',
@@ -2125,12 +2125,16 @@ class Perl6Lexer(ExtendedRegexLexer):
end_pos = next_close_pos
+ if end_pos < 0: # if we didn't find a closer, just highlight the
+ # rest of the text in this class
+ end_pos = len(text)
+
if adverbs is not None and re.search(r':to\b', adverbs):
heredoc_terminator = text[match.start('delimiter') + n_chars : end_pos]
- end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) + r'\s*$', text[ match.end('delimiter') : ], re.MULTILINE)
+ end_heredoc = re.search(r'^\s*' + re.escape(heredoc_terminator) + r'\s*$', text[ end_pos : ], re.MULTILINE)
if end_heredoc:
- end_pos = match.end('delimiter') + end_heredoc.end()
+ end_pos += end_heredoc.end()
else:
end_pos = len(text)
@@ -2207,7 +2211,7 @@ class Perl6Lexer(ExtendedRegexLexer):
(r'(?<=~~)\s*/(?:\\\\|\\/|.)*?/', String.Regex),
(r'(?<=[=(,])\s*/(?:\\\\|\\/|.)*?/', String.Regex),
(r'm\w+(?=\()', Name),
- (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])(?P=first_char)*)', brackets_callback(String.Regex)),
+ (r'(?:m|ms|rx)\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z_:\s])(?P=first_char)*)', brackets_callback(String.Regex)),
(r'(?:s|ss|tr)\s*(?::[\w\s:]+)?\s*/(?:\\\\|\\/|.)*?/(?:\\\\|\\/|.)*?/', String.Regex),
(r'<[^\s=].*?\S>', String),
(_build_word_match(PERL6_OPERATORS), Operator),
@@ -2245,9 +2249,6 @@ class Perl6Lexer(ExtendedRegexLexer):
}
def analyse_text(text):
- # disabled for now; the lexer is not bug-free and will loop sometimes,
- # so let's be sure to use it only for "real" Perl 6 code.
- return False
def strip_pod(lines):
in_pod = False
stripped_lines = []
@@ -2262,30 +2263,41 @@ class Perl6Lexer(ExtendedRegexLexer):
return stripped_lines
+ # XXX handle block comments
lines = text.splitlines()
lines = strip_pod(lines)
text = '\n'.join(lines)
- if shebang_matches(text, r'perl6|rakudo|niecza'):
+ if shebang_matches(text, r'perl6|rakudo|niecza|pugs'):
return True
- if 'use v6' in text:
- return 0.91 # 0.01 greater than Perl says for 'my $'
- if re.search(r'[$@%]\*[A-Z]+', text): # Perl 6-style globals ($*OS)
- return 0.91
- if re.search(r'[$@%]\?[A-Z]+', text): # Perl 6 compiler variables ($?PACKAGE)
- return 0.91
- if re.search(r'[$@%][!.][A-Za-z0-9_-]+', text): # Perl 6 member variables
- return 0.91
-
- for line in text.splitlines():
- if re.match(r'\s*(?:my|our)?\s*module', line): # module declarations
- return 0.91
- if re.match(r'\s*(?:my|our)?\s*role', line): # role declarations
- return 0.91
- if re.match(r'\s*(?:my|our)?\s*class\b', line): # class declarations
- return 0.91
- return False
+ saw_perl_decl = False
+ rating = False
+
+ # check for my/our/has declarations
+ # copied PERL6_IDENTIFIER_RANGE from above; not happy about that
+ if re.search("(?:my|our|has)\s+(?:['a-zA-Z0-9_:-]+\s+)?[$@%&(]", text):
+ rating = 0.8
+ saw_perl_decl = True
+
+ for line in lines:
+ line = re.sub('#.*', '', line)
+ if re.match('^\s*$', line):
+ continue
+
+ # match v6; use v6; use v6.0; use v6.0.0;
+ if re.match('^\s*(?:use\s+)?v6(?:\.\d(?:\.\d)?)?;', line):
+ return True
+ # match class, module, role, enum, grammar declarations
+ class_decl = re.match('^\s*(?:(?P<scope>my|our)\s+)?(?:module|class|role|enum|grammar)', line)
+ if class_decl:
+ if saw_perl_decl or class_decl.group('scope') is not None:
+ return True
+ rating = 0.05
+ continue
+ break
+
+ return rating
def __init__(self, **options):
super(Perl6Lexer, self).__init__(**options)