summaryrefslogtreecommitdiff
path: root/compressor/parser/html5lib.py
diff options
context:
space:
mode:
Diffstat (limited to 'compressor/parser/html5lib.py')
-rw-r--r--compressor/parser/html5lib.py37
1 files changed, 20 insertions, 17 deletions
diff --git a/compressor/parser/html5lib.py b/compressor/parser/html5lib.py
index 7fee590..b1d0948 100644
--- a/compressor/parser/html5lib.py
+++ b/compressor/parser/html5lib.py
@@ -1,6 +1,6 @@
from __future__ import absolute_import
-from django.utils.encoding import smart_unicode
from django.core.exceptions import ImproperlyConfigured
+from django.utils.encoding import smart_text
from compressor.exceptions import ParserError
from compressor.parser import ParserBase
@@ -15,42 +15,45 @@ class Html5LibParser(ParserBase):
self.html5lib = html5lib
def _serialize(self, elem):
- fragment = self.html5lib.treebuilders.simpletree.DocumentFragment()
- fragment.appendChild(elem)
- return self.html5lib.serialize(fragment,
- quote_attr_values=True, omit_optional_tags=False)
+ return self.html5lib.serialize(
+ elem, tree="etree", quote_attr_values=True,
+ omit_optional_tags=False, use_trailing_solidus=True,
+ )
def _find(self, *names):
- for node in self.html.childNodes:
- if node.type == 5 and node.name in names:
- yield node
+ for elem in self.html:
+ if elem.tag in names:
+ yield elem
@cached_property
def html(self):
try:
- return self.html5lib.parseFragment(self.content)
- except ImportError, err:
+ return self.html5lib.parseFragment(self.content, treebuilder="etree")
+ except ImportError as err:
raise ImproperlyConfigured("Error while importing html5lib: %s" % err)
- except Exception, err:
+ except Exception as err:
raise ParserError("Error while initializing Parser: %s" % err)
def css_elems(self):
- return self._find('style', 'link')
+ return self._find('{http://www.w3.org/1999/xhtml}link',
+ '{http://www.w3.org/1999/xhtml}style')
def js_elems(self):
- return self._find('script')
+ return self._find('{http://www.w3.org/1999/xhtml}script')
def elem_attribs(self, elem):
- return elem.attributes
+ return elem.attrib
def elem_content(self, elem):
- return elem.childNodes[0].value
+ return smart_text(elem.text)
def elem_name(self, elem):
- return elem.name
+ if '}' in elem.tag:
+ return elem.tag.split('}')[1]
+ return elem.tag
def elem_str(self, elem):
# This method serializes HTML in a way that does not pass all tests.
# However, this method is only called in tests anyway, so it doesn't
# really matter.
- return smart_unicode(self._serialize(elem))
+ return smart_text(self._serialize(elem))