merge with timjb/pygments-main (Agda lexers), pull request #203

author: Georg Brandl <georg@python.org> 2013-05-20 07:37:47 +0200
committer: Georg Brandl <georg@python.org> 2013-05-20 07:37:47 +0200
commit: fcc0147c89efa90a992bf9a5b8560ef72eaba1eb (patch)
tree: 6bb8b9a941f4835b2b9ce31faf8a4d35377f40e6
parent: 4f8ffd30e963bb4a7c8691a32c91279e114ecf8b (diff)
parent: ac957840bdb18b1d4fb8202228cae6b00dd23a52 (diff)
download: pygments-fcc0147c89efa90a992bf9a5b8560ef72eaba1eb.tar.gz
7 files changed, 261 insertions, 21 deletions
diff --git a/AUTHORS b/AUTHORS
index 7e38aea0..1c4a9992 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -16,6 +16,7 @@ Other contributors, listed alphabetically, are:
 * Stefan Matthias Aust -- Smalltalk lexer
 * Ben Bangert -- Mako lexers
 * Max Battcher -- Darcs patch lexer
+* Tim Baumann -- (Literate) Agda lexer
 * Paul Baumgart, 280 North, Inc. -- Objective-J lexer
 * Michael Bayer -- Myghty lexers
 * John Benediktsson -- Factor lexer
diff --git a/CHANGES b/CHANGES
index cb967648..9b2cafc5 100644
--- a/CHANGES
+++ b/CHANGES
@@ -20,6 +20,7 @@ Version 1.7
   * EBNF (PR#193)
   * Igor Pro (PR#172)
   * Rexx (PR#199)
+  * Agda and Literate Agda (PR#203)
 
 - Pygments will now recognize "vim" modelines when guessing the lexer for
   a file based on content (PR#118).
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index d377a5a8..969bdba5 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -18,6 +18,7 @@ LEXERS = {
     'ActionScript3Lexer': ('pygments.lexers.web', 'ActionScript 3', ('as3', 'actionscript3'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')),
     'ActionScriptLexer': ('pygments.lexers.web', 'ActionScript', ('as', 'actionscript'), ('*.as',), ('application/x-actionscript3', 'text/x-actionscript3', 'text/actionscript3')),
     'AdaLexer': ('pygments.lexers.compiled', 'Ada', ('ada', 'ada95ada2005'), ('*.adb', '*.ads', '*.ada'), ('text/x-ada',)),
+    'AgdaLexer': ('pygments.lexers.functional', 'Agda', ('agda',), ('*.agda',), ('text/x-agda',)),
     'AntlrActionScriptLexer': ('pygments.lexers.parsers', 'ANTLR With ActionScript Target', ('antlr-as', 'antlr-actionscript'), ('*.G', '*.g'), ()),
     'AntlrCSharpLexer': ('pygments.lexers.parsers', 'ANTLR With C# Target', ('antlr-csharp', 'antlr-c#'), ('*.G', '*.g'), ()),
     'AntlrCppLexer': ('pygments.lexers.parsers', 'ANTLR With CPP Target', ('antlr-cpp',), ('*.G', '*.g'), ()),
@@ -163,6 +164,7 @@ LEXERS = {
     'LassoLexer': ('pygments.lexers.web', 'Lasso', ('lasso', 'lassoscript'), ('*.lasso', '*.lasso[89]'), ('text/x-lasso',)),
     'LassoXmlLexer': ('pygments.lexers.templates', 'XML+Lasso', ('xml+lasso',), (), ('application/xml+lasso',)),
     'LighttpdConfLexer': ('pygments.lexers.text', 'Lighttpd configuration file', ('lighty', 'lighttpd'), (), ('text/x-lighttpd-conf',)),
+    'LiterateAgdaLexer': ('pygments.lexers.functional', 'Literate Agda', ('lagda', 'literate-agda'), ('*.lagda',), ('text/x-literate-agda',)),
     'LiterateHaskellLexer': ('pygments.lexers.functional', 'Literate Haskell', ('lhs', 'literate-haskell', 'lhaskell'), ('*.lhs',), ('text/x-literate-haskell',)),
     'LiveScriptLexer': ('pygments.lexers.web', 'LiveScript', ('live-script', 'livescript'), ('*.ls',), ('text/livescript',)),
     'LlvmLexer': ('pygments.lexers.asm', 'LLVM', ('llvm',), ('*.ll',), ('text/x-llvm',)),
diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py
index 77fe4723..770e6bd9 100644
--- a/pygments/lexers/functional.py
+++ b/pygments/lexers/functional.py
@@ -16,9 +16,13 @@ from pygments.token import Text, Comment, Operator, Keyword, Name, \
      String, Number, Punctuation, Literal, Generic, Error
 
 __all__ = ['RacketLexer', 'SchemeLexer', 'CommonLispLexer', 'HaskellLexer',
-           'LiterateHaskellLexer', 'SMLLexer', 'OcamlLexer', 'ErlangLexer',
-           'ErlangShellLexer', 'OpaLexer', 'CoqLexer', 'NewLispLexer',
-           'ElixirLexer', 'ElixirConsoleLexer', 'KokaLexer']
+           'AgdaLexer', 'LiterateHaskellLexer', 'LiterateAgdaLexer',
+           'SMLLexer', 'OcamlLexer', 'ErlangLexer', 'ErlangShellLexer',
+           'OpaLexer', 'CoqLexer', 'NewLispLexer', 'ElixirLexer',
+           'ElixirConsoleLexer', 'KokaLexer']
+
+
+line_re = re.compile('.*?\n')
 
 
 class RacketLexer(RegexLexer):
@@ -981,6 +985,8 @@ class HaskellLexer(RegexLexer):
             (r'\(', Punctuation, ('funclist', 'funclist')),
             (r'\)', Punctuation, '#pop:2'),
         ],
+        # NOTE: the next four states are shared in the AgdaLexer; make sure
+        # any change is compatible with Agda as well or copy over and change
         'comment': [
             # Multiline Comments
             (r'[^-{}]+', Comment.Multiline),
@@ -1011,12 +1017,78 @@ class HaskellLexer(RegexLexer):
     }
 
 
-line_re = re.compile('.*?\n')
-bird_re = re.compile(r'(>[ \t]*)(.*\n)')
+class AgdaLexer(RegexLexer):
+    """
+    For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
+    dependently typed functional programming language and proof assistant.
 
-class LiterateHaskellLexer(Lexer):
+    *New in Pygments 1.7.*
     """
-    For Literate Haskell (Bird-style or LaTeX) source.
+
+    name = 'Agda'
+    aliases = ['agda']
+    filenames = ['*.agda']
+    mimetypes = ['text/x-agda']
+
+    reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
+                'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
+                'infixl', 'infixr', 'let', 'open', 'pattern', 'primitive',
+                'private', 'mutual', 'quote', 'quoteGoal', 'quoteTerm',
+                'record', 'syntax', 'rewrite', 'unquote', 'using', 'where',
+                'with']
+
+    tokens = {
+        'root': [
+            # Declaration
+            (r'^(\s*)([^\s\(\)\{\}]+)(\s*)(:)(\s*)',
+             bygroups(Text, Name.Function, Text, Operator.Word, Text)),
+            # Comments
+            (r'--(?![!#$%&*+./<=>?@\^|_~:\\]).*?$', Comment.Single),
+            (r'{-', Comment.Multiline, 'comment'),
+            # Holes
+            (r'{!', Comment.Directive, 'hole'),
+            # Lexemes:
+            #  Identifiers
+            (ur'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
+            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
+            (r'\b(Set|Prop)\b', Keyword.Type),
+            #  Special Symbols
+            (r'(\(|\)|\{|\})', Operator),
+            (ur'(\.{1,3}|\||[\u039B]|[\u2200]|[\u2192]|:|=|->)', Operator.Word),
+            #  Numbers
+            (r'\d+[eE][+-]?\d+', Number.Float),
+            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
+            (r'0[xX][\da-fA-F]+', Number.Hex),
+            (r'\d+', Number.Integer),
+            # Strings
+            (r"'", String.Char, 'character'),
+            (r'"', String, 'string'),
+            (r'[^\s\(\)\{\}]+', Text),
+            (r'\s+?', Text),  # Whitespace
+        ],
+        'hole': [
+            # Holes
+            (r'[^!{}]+', Comment.Directive),
+            (r'{!', Comment.Directive, '#push'),
+            (r'!}', Comment.Directive, '#pop'),
+            (r'[!{}]', Comment.Directive),
+        ],
+        'module': [
+            (r'{-', Comment.Multiline, 'comment'),
+            (r'[a-zA-Z][a-zA-Z0-9_.]*', Name, '#pop'),
+            (r'[^a-zA-Z]*', Text)
+        ],
+        'comment': HaskellLexer.tokens['comment'],
+        'character': HaskellLexer.tokens['character'],
+        'string': HaskellLexer.tokens['string'],
+        'escape': HaskellLexer.tokens['escape']
+    }
+
+
+class LiterateLexer(Lexer):
+    """
+    Base class for lexers of literate file formats based on LaTeX or Bird-style
+    (prefixing each code line with ">").
 
     Additional options accepted:
 
@@ -1024,17 +1096,15 @@ class LiterateHaskellLexer(Lexer):
         If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
         is autodetected: if the first non-whitespace character in the source
         is a backslash or percent character, LaTeX is assumed, else Bird.
-
-    *New in Pygments 0.9.*
     """
-    name = 'Literate Haskell'
-    aliases = ['lhs', 'literate-haskell', 'lhaskell']
-    filenames = ['*.lhs']
-    mimetypes = ['text/x-literate-haskell']
 
-    def get_tokens_unprocessed(self, text):
-        hslexer = HaskellLexer(**self.options)
+    bird_re = re.compile(r'(>[ \t]*)(.*\n)')
 
+    def __init__(self, baselexer, **options):
+        self.baselexer = baselexer
+        Lexer.__init__(self, **options)
+
+    def get_tokens_unprocessed(self, text):
         style = self.options.get('litstyle')
         if style is None:
             style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
@@ -1045,7 +1115,7 @@ class LiterateHaskellLexer(Lexer):
             # bird-style
             for match in line_re.finditer(text):
                 line = match.group()
-                m = bird_re.match(line)
+                m = self.bird_re.match(line)
                 if m:
                     insertions.append((len(code),
                                        [(0, Comment.Special, m.group(1))]))
@@ -1056,7 +1126,6 @@ class LiterateHaskellLexer(Lexer):
             # latex-style
             from pygments.lexers.text import TexLexer
             lxlexer = TexLexer(**self.options)
-
             codelines = 0
             latex = ''
             for match in line_re.finditer(text):
@@ -1077,10 +1146,56 @@ class LiterateHaskellLexer(Lexer):
                     latex += line
             insertions.append((len(code),
                                list(lxlexer.get_tokens_unprocessed(latex))))
-        for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)):
+        for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)):
             yield item
 
 
+class LiterateHaskellLexer(LiterateLexer):
+    """
+    For Literate Haskell (Bird-style or LaTeX) source.
+
+    Additional options accepted:
+
+    `litstyle`
+        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
+        is autodetected: if the first non-whitespace character in the source
+        is a backslash or percent character, LaTeX is assumed, else Bird.
+
+    *New in Pygments 0.9.*
+    """
+    name = 'Literate Haskell'
+    aliases = ['lhs', 'literate-haskell', 'lhaskell']
+    filenames = ['*.lhs']
+    mimetypes = ['text/x-literate-haskell']
+
+    def __init__(self, **options):
+        hslexer = HaskellLexer(**options)
+        LiterateLexer.__init__(self, hslexer, **options)
+
+
+class LiterateAgdaLexer(LiterateLexer):
+    """
+    For Literate Agda source.
+
+    Additional options accepted:
+
+    `litstyle`
+        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
+        is autodetected: if the first non-whitespace character in the source
+        is a backslash or percent character, LaTeX is assumed, else Bird.
+
+    *New in Pygments 1.7.*
+    """
+    name = 'Literate Agda'
+    aliases = ['lagda', 'literate-agda']
+    filenames = ['*.lagda']
+    mimetypes = ['text/x-literate-agda']
+
+    def __init__(self, **options):
+        agdalexer = AgdaLexer(**options)
+        LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
+
+
 class SMLLexer(RegexLexer):
     """
     For the Standard ML language.
diff --git a/tests/examplefiles/example.lagda b/tests/examplefiles/example.lagda
new file mode 100644
index 00000000..b5476fa0
--- /dev/null
+++ b/tests/examplefiles/example.lagda
@@ -0,0 +1,19 @@
+\documentclass{article}
+% this is a LaTeX comment
+\usepackage{agda}
+
+\begin{document}
+
+Here's how you can define \emph{RGB} colors in Agda:
+
+\begin{code}
+module example where
+
+open import Data.Fin
+open import Data.Nat
+
+data Color : Set where
+    RGB : Fin 256 → Fin 256 → Fin 256 → Color
+\end{code}
+
+\end{document}
+\ No newline at end of file
diff --git a/tests/examplefiles/test.agda b/tests/examplefiles/test.agda
new file mode 100644
index 00000000..d930a77b
--- /dev/null
+++ b/tests/examplefiles/test.agda
@@ -0,0 +1,102 @@
+-- An Agda example file
+
+module test where
+
+open import Coinduction
+open import Data.Bool
+open import {- pointless comment between import and module name -} Data.Char
+open import Data.Nat
+open import Data.Nat.Properties
+open import Data.String
+open import Data.List hiding ([_])
+open import Data.Vec hiding ([_])
+open import Relation.Nullary.Core
+open import Relation.Binary.PropositionalEquality using (_≡_; refl; cong; trans; inspect; [_])
+
+open SemiringSolver
+
+{- this is a {- nested -} comment -}
+
+-- Factorial
+_! : ℕ → ℕ
+0 ! = 1
+(suc n) ! = (suc n) * n !
+
+-- The binomial coefficient
+_choose_ : ℕ → ℕ → ℕ
+_ choose 0 = 1
+0 choose _ = 0
+(suc n) choose (suc m) = (n choose m) + (n choose (suc m)) -- Pascal's rule
+
+choose-too-many : ∀ n m → n ≤ m → n choose (suc m) ≡ 0
+choose-too-many .0 m z≤n = refl
+choose-too-many (suc n) (suc m) (s≤s le) with n choose (suc m) | choose-too-many n m le | n choose (suc (suc m)) | choose-too-many n (suc m) (≤-step le)
+... | .0 | refl | .0 | refl = refl
+
+_++'_ : ∀ {a n m} {A : Set a} → Vec A n → Vec A m → Vec A (m + n)
+_++'_ {_} {n} {m} v₁ v₂ rewrite solve 2 (λ a b → b :+ a := a :+ b) refl n m = v₁ Data.Vec.++ v₂
+
+++'-test : (1 ∷ 2 ∷ 3 ∷ []) ++' (4 ∷ 5 ∷ []) ≡ (1 ∷ 2 ∷ 3 ∷ 4 ∷ 5 ∷ [])
+++'-test = refl
+
+data Coℕ : Set where
+  co0   : Coℕ
+  cosuc : ∞ Coℕ → Coℕ
+
+nanana : Coℕ
+nanana = let two = ♯ cosuc (♯ (cosuc (♯ co0))) in cosuc two
+
+abstract
+  data VacuumCleaner : Set where
+    Roomba : VacuumCleaner
+
+pointlessLemmaAboutBoolFunctions : (f : Bool → Bool) → f (f (f true)) ≡ f true
+pointlessLemmaAboutBoolFunctions f with f true | inspect f true
+... | true  | [ eq₁ ] = trans (cong f eq₁) eq₁
+... | false | [ eq₁ ] with f false | inspect f false
+... | true  | _       = eq₁
+... | false | [ eq₂ ] = eq₂
+
+mutual
+  isEven : ℕ → Bool
+  isEven 0       = true
+  isEven (suc n) = not (isOdd n)
+
+  isOdd : ℕ → Bool
+  isOdd 0       = false
+  isOdd (suc n) = not (isEven n)
+
+foo : String
+foo = "Hello World!"
+
+nl : Char
+nl = '\n'
+
+private
+  intersperseString : Char → List String → String
+  intersperseString c []       = ""
+  intersperseString c (x ∷ xs) = Data.List.foldl (λ a b → a Data.String.++ Data.String.fromList (c ∷ []) Data.String.++ b) x xs
+
+baz : String
+baz = intersperseString nl (Data.List.replicate 5 foo)
+
+postulate
+  Float : Set
+
+{-# BUILTIN FLOAT Float  #-}
+
+pi : Float
+pi = 3.141593
+
+-- Astronomical unit
+au : Float
+au = 1.496e11 -- m
+
+plusFloat : Float → Float → Float
+plusFloat a b = {! !}
+
+record Subset (A : Set) (P : A → Set) : Set where
+  constructor _#_
+  field
+    elem   : A
+    .proof : P elem
diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py
index 00dc26f0..18ed8d64 100644
--- a/tests/test_basic_api.py
+++ b/tests/test_basic_api.py
@@ -92,9 +92,9 @@ def test_lexer_options():
         if cls.__name__ not in (
             'PythonConsoleLexer', 'RConsoleLexer', 'RubyConsoleLexer',
             'SqliteConsoleLexer', 'MatlabSessionLexer', 'ErlangShellLexer',
-            'BashSessionLexer', 'LiterateHaskellLexer', 'PostgresConsoleLexer',
-            'ElixirConsoleLexer', 'JuliaConsoleLexer', 'RobotFrameworkLexer',
-            'DylanConsoleLexer', 'ShellSessionLexer'):
+            'BashSessionLexer', 'LiterateHaskellLexer', 'LiterateAgdaLexer',
+            'PostgresConsoleLexer', 'ElixirConsoleLexer', 'JuliaConsoleLexer',
+            'RobotFrameworkLexer', 'DylanConsoleLexer', 'ShellSessionLexer'):
             inst = cls(ensurenl=False)
             ensure(inst.get_tokens('a\nb'), 'a\nb')
             inst = cls(ensurenl=False, stripall=True)
author	Georg Brandl <georg@python.org>	2013-05-20 07:37:47 +0200
committer	Georg Brandl <georg@python.org>	2013-05-20 07:37:47 +0200
commit	fcc0147c89efa90a992bf9a5b8560ef72eaba1eb (patch)
tree	6bb8b9a941f4835b2b9ce31faf8a4d35377f40e6
parent	4f8ffd30e963bb4a7c8691a32c91279e114ecf8b (diff)
parent	ac957840bdb18b1d4fb8202228cae6b00dd23a52 (diff)
download	pygments-fcc0147c89efa90a992bf9a5b8560ef72eaba1eb.tar.gz