import unittest, sys from lxml.tests.common_imports import make_doctest from lxml.etree import LIBXML_VERSION import lxml.html from lxml.html.clean import Cleaner, clean_html class CleanerTest(unittest.TestCase): def test_allow_tags(self): html = """
some text
| hello | world |
| hello | world |
Cyan
""" safe_attrs=set(lxml.html.defs.safe_attrs) safe_attrs.add('style') cleaner = Cleaner( safe_attrs_only=True, safe_attrs=safe_attrs) result = cleaner.clean_html(html) self.assertEqual(html, result) def test_safe_attrs_excluded(self): html = """Cyan
""" expected = """Cyan
""" safe_attrs=set() cleaner = Cleaner( safe_attrs_only=True, safe_attrs=safe_attrs) result = cleaner.clean_html(html) self.assertEqual(expected, result) def test_clean_invalid_root_tag(self): # only testing that cleaning with invalid root tags works at all s = lxml.html.fromstring('parent