diff options
author | Aron Griffis <aron@arongriffis.com> | 2013-09-19 21:07:03 -0400 |
---|---|---|
committer | Aron Griffis <aron@arongriffis.com> | 2013-09-19 21:07:03 -0400 |
commit | dd8aa7cac51dc8f0f2c273a0c2082fa8ed465193 (patch) | |
tree | 605861cf4cd995749ab97b3739226b6fbde68d37 | |
parent | 68e926a0dba69b96f908226bda12fa9d78f5aa1e (diff) | |
download | smartypants-dd8aa7cac51dc8f0f2c273a0c2082fa8ed465193.tar.gz |
Fix the primitive tokenizer to handle HTML comments that contain tags, otherwise this generates completely broken HTML by turning the end-comment into an en-dash.
-rwxr-xr-x | smartypants.py | 2 | ||||
-rw-r--r-- | tests/test.py | 7 |
2 files changed, 8 insertions, 1 deletions
diff --git a/smartypants.py b/smartypants.py index aec885b..f8d56f3 100755 --- a/smartypants.py +++ b/smartypants.py @@ -709,7 +709,7 @@ def _tokenize(text): tokens = [] - tag_soup = re.compile('([^<]*)(<[^>]*>)') + tag_soup = re.compile(r"""(?s)([^<]*)(<!--.*?--\s*>|<[^>]*>)""") token_match = tag_soup.search(text) diff --git a/tests/test.py b/tests/test.py index ffd20ac..d0622de 100644 --- a/tests/test.py +++ b/tests/test.py @@ -84,6 +84,13 @@ document.write('<a href="' + href + '">' + linktext + "</a>"); "is python code.</p>") self.assertEqual(T, E) + def test_comments(self): + + self.assertEqual(sp("--"), "—") + self.assertEqual(sp("-->"), "—>") + self.assertEqual(sp("<!-- comment -->"), "<!-- comment -->") + self.assertEqual(sp("<!-- <li>Fee-fi-of-fum</li> -->"), "<!-- <li>Fee-fi-of-fum</li> -->") + def test_ordinal_numbers(self): self.assertEqual(sp("21st century"), "21st century") # no effect. |