Clarify mimetype handling in HttpLexer, add test.

author: Tim Hatch <tim@timhatch.com> 2014-10-10 07:46:39 -0700
committer: Tim Hatch <tim@timhatch.com> 2014-10-10 07:46:39 -0700
commit: 8a42b5dc34e512a7fb3a9d7ce4d5cc436eccb099 (patch)
tree: 0e9b1c83c17dd77cc0333c129865f8034134ac54
parent: 9babe3a89768c713a87e68115ee39bf4fe1cc717 (diff)
download: pygments-8a42b5dc34e512a7fb3a9d7ce4d5cc436eccb099.tar.gz
2 files changed, 58 insertions, 18 deletions
diff --git a/pygments/lexers/textfmts.py b/pygments/lexers/textfmts.py
index 04222562..30712f5d 100644
--- a/pygments/lexers/textfmts.py
+++ b/pygments/lexers/textfmts.py
@@ -146,24 +146,23 @@ class HttpLexer(RegexLexer):
         offset = match.start()
         if content_type:
             from pygments.lexers import get_lexer_for_mimetype
-            try:
-                lexer = get_lexer_for_mimetype(content_type)
-            except ClassNotFound:
-                pass
-            else:
-                for idx, token, value in lexer.get_tokens_unprocessed(content):
-                    yield offset + idx, token, value
-                return
-            # Check for with just the suffix
-            content_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2', content_type)
-            try:
-                lexer = get_lexer_for_mimetype(content_type)
-            except ClassNotFound:
-                pass
-            else:
-                for idx, token, value in lexer.get_tokens_unprocessed(content):
-                    yield offset + idx, token, value
-                return
+            possible_lexer_mimetypes = [content_type]
+            if '+' in content_type:
+                # application/calendar+xml can be treated as application/xml
+                # if there's not a better match.
+                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
+                                      content_type)
+                possible_lexer_mimetypes.append(general_type)
+
+            for i in possible_lexer_mimetypes:
+                try:
+                    lexer = get_lexer_for_mimetype(i)
+                except ClassNotFound:
+                    pass
+                else:
+                    for idx, token, value in lexer.get_tokens_unprocessed(content):
+                        yield offset + idx, token, value
+                    return
         yield offset, Text, content
 
     tokens = {
diff --git a/tests/test_textfmts.py b/tests/test_textfmts.py
new file mode 100644
index 00000000..de94545a
--- /dev/null
+++ b/tests/test_textfmts.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+"""
+    Basic Tests for textfmts
+    ~~~~~~~~~~~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import unittest
+
+from pygments.token import Operator, Number, Text, Token
+from pygments.lexers.textfmts import HttpLexer
+
+
+class RubyTest(unittest.TestCase):
+
+    def setUp(self):
+        self.lexer = HttpLexer()
+        self.maxDiff = None
+
+    def testApplicationXml(self):
+        fragment = u'GET / HTTP/1.0\nContent-Type: application/xml\n\n<foo>\n'
+        tokens = [
+            (Token.Name.Tag, u'<foo'),
+            (Token.Name.Tag, u'>'),
+            (Token.Text, u'\n'),
+        ]
+        self.assertEqual(
+            tokens, list(self.lexer.get_tokens(fragment))[-len(tokens):])
+
+    def testApplicationCalendarXml(self):
+        fragment = u'GET / HTTP/1.0\nContent-Type: application/calendar+xml\n\n<foo>\n'
+        tokens = [
+            (Token.Name.Tag, u'<foo'),
+            (Token.Name.Tag, u'>'),
+            (Token.Text, u'\n'),
+        ]
+        self.assertEqual(
+            tokens, list(self.lexer.get_tokens(fragment))[-len(tokens):])
+
author	Tim Hatch <tim@timhatch.com>	2014-10-10 07:46:39 -0700
committer	Tim Hatch <tim@timhatch.com>	2014-10-10 07:46:39 -0700
commit	8a42b5dc34e512a7fb3a9d7ce4d5cc436eccb099 (patch)
tree	0e9b1c83c17dd77cc0333c129865f8034134ac54
parent	9babe3a89768c713a87e68115ee39bf4fe1cc717 (diff)
download	pygments-8a42b5dc34e512a7fb3a9d7ce4d5cc436eccb099.tar.gz