fix tag regexp to match quoted groups correctly

Fixed issue in lexer where the regexp used to match tags would not correctly interpret quoted sections individually. While this parsing issue still produced the same expected tag structure later on, the mis-handling of quoted sections was also subject to a regexp crash if a tag had a large number of quotes within its quoted sections. Fixes: #366 Change-Id: I74e0d71ff7f419970711a7cd51adcf1bb90a44c0
author: Mike Bayer <mike_mp@zzzcomputing.com> 2022-08-29 12:28:52 -0400
committer: Mike Bayer <mike_mp@zzzcomputing.com> 2022-08-29 12:31:58 -0400
commit: 925760291d6efec64fda6e9dd1fd9cfbd5be068c (patch)
tree: 9ebd30083f99a80c60766ca61fd546a4de71db41
parent: 7c5b28ac47755598e8c5bdfc995eaf220132e672 (diff)
download: mako-925760291d6efec64fda6e9dd1fd9cfbd5be068c.tar.gz
3 files changed, 34 insertions, 8 deletions
diff --git a/doc/build/unreleased/366.rst b/doc/build/unreleased/366.rst
new file mode 100644
index 0000000..27b0278
--- /dev/null
+++ b/doc/build/unreleased/366.rst
@@ -0,0 +1,9 @@
+.. change::
+    :tags: bug, lexer
+    :tickets: 366
+
+    Fixed issue in lexer where the regexp used to match tags would not
+    correctly interpret quoted sections individually. While this parsing issue
+    still produced the same expected tag structure later on, the mis-handling
+    of quoted sections was also subject to a regexp crash if a tag had a large
+    number of quotes within its quoted sections.
+\ No newline at end of file
diff --git a/mako/lexer.py b/mako/lexer.py
index bfcf286..77a2483 100644
--- a/mako/lexer.py
+++ b/mako/lexer.py
@@ -272,20 +272,24 @@ class Lexer:
         return self.template
 
     def match_tag_start(self):
-        match = self.match(
-            r"""
+        reg = r"""
             \<%     # opening tag
 
             ([\w\.\:]+)   # keyword
 
-            ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*)  # attrname, = \
+            ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*)  # attrname, = \
                                                #        sign, string expression
+                                               # comma is for backwards compat
+                                               # identified in #366
 
             \s*     # more whitespace
 
             (/)?>   # closing
 
-            """,
+        """
+
+        match = self.match(
+            reg,
             re.I | re.S | re.X,
         )
 
diff --git a/test/test_lexer.py b/test/test_lexer.py
index 255c128..a7b6fe3 100644
--- a/test/test_lexer.py
+++ b/test/test_lexer.py
@@ -1,5 +1,7 @@
 import re
 
+import pytest
+
 from mako import compat
 from mako import exceptions
 from mako import parsetree
@@ -146,6 +148,10 @@ class LexerTest(TemplateTest):
         """
         assert_raises(exceptions.CompileException, Lexer(template).parse)
 
+    def test_tag_many_quotes(self):
+        template = "<%0" + '"' * 3000
+        assert_raises(exceptions.SyntaxException, Lexer(template).parse)
+
     def test_unmatched_tag(self):
         template = """
         <%namespace name="bar">
@@ -432,9 +438,16 @@ class LexerTest(TemplateTest):
             ),
         )
 
-    def test_pagetag(self):
-        template = """
-            <%page cached="True", args="a, b"/>
+    @pytest.mark.parametrize("comma,numchars", [(",", 48), ("", 47)])
+    def test_pagetag(self, comma, numchars):
+        # note that the comma here looks like:
+        # <%page cached="True", args="a, b"/>
+        # that's what this test has looked like for decades, however, the
+        # comma there is not actually the right syntax.  When issue #366
+        # was fixed, the reg was altered to accommodate for this comma to allow
+        # backwards compat
+        template = f"""
+            <%page cached="True"{comma} args="a, b"/>
 
             some template
         """
@@ -453,7 +466,7 @@ class LexerTest(TemplateTest):
 
             some template
         """,
-                        (2, 48),
+                        (2, numchars),
                     ),
                 ],
             ),
author	Mike Bayer <mike_mp@zzzcomputing.com>	2022-08-29 12:28:52 -0400
committer	Mike Bayer <mike_mp@zzzcomputing.com>	2022-08-29 12:31:58 -0400
commit	925760291d6efec64fda6e9dd1fd9cfbd5be068c (patch)
tree	9ebd30083f99a80c60766ca61fd546a4de71db41
parent	7c5b28ac47755598e8c5bdfc995eaf220132e672 (diff)
download	mako-925760291d6efec64fda6e9dd1fd9cfbd5be068c.tar.gz