summaryrefslogtreecommitdiff
path: root/compressor/parser/lxml.py
diff options
context:
space:
mode:
Diffstat (limited to 'compressor/parser/lxml.py')
-rw-r--r--compressor/parser/lxml.py54
1 files changed, 39 insertions, 15 deletions
diff --git a/compressor/parser/lxml.py b/compressor/parser/lxml.py
index 7bbb561..64a8fcb 100644
--- a/compressor/parser/lxml.py
+++ b/compressor/parser/lxml.py
@@ -1,6 +1,8 @@
-from __future__ import absolute_import
+from __future__ import absolute_import, unicode_literals
+
from django.core.exceptions import ImproperlyConfigured
-from django.utils.encoding import smart_unicode
+from django.utils import six
+from django.utils.encoding import smart_text
from compressor.exceptions import ParserError
from compressor.parser import ParserBase
@@ -8,28 +10,50 @@ from compressor.utils.decorators import cached_property
class LxmlParser(ParserBase):
-
+ """
+ LxmlParser will use `lxml.html` parser to parse rendered contents of
+ {% compress %} tag. Under python 2 it will also try to use beautiful
+ soup parser in case of any problems with encoding.
+ """
def __init__(self, content):
try:
- from lxml.html import fromstring, soupparser
+ from lxml.html import fromstring
from lxml.etree import tostring
- self.fromstring = fromstring
- self.soupparser = soupparser
- self.tostring = tostring
- except ImportError, err:
+ except ImportError as err:
raise ImproperlyConfigured("Error while importing lxml: %s" % err)
- except Exception, err:
- raise ParserError("Error while initializing Parser: %s" % err)
+ except Exception as err:
+ raise ParserError("Error while initializing parser: %s" % err)
+
+ if not six.PY3:
+ # soupparser uses Beautiful Soup 3 which does not run on python 3.x
+ try:
+ from lxml.html import soupparser
+ except ImportError as err:
+ soupparser = None
+ except Exception as err:
+ raise ParserError("Error while initializing parser: %s" % err)
+ else:
+ soupparser = None
+
+ self.soupparser = soupparser
+ self.fromstring = fromstring
+ self.tostring = tostring
super(LxmlParser, self).__init__(content)
@cached_property
def tree(self):
+ """
+ Document tree.
+ """
content = '<root>%s</root>' % self.content
tree = self.fromstring(content)
try:
- self.tostring(tree, encoding=unicode)
+ self.tostring(tree, encoding=six.text_type)
except UnicodeDecodeError:
- tree = self.soupparser.fromstring(content)
+ if self.soupparser: # use soup parser on python 2
+ tree = self.soupparser.fromstring(content)
+ else: # raise an error on python 3
+ raise
return tree
def css_elems(self):
@@ -43,14 +67,14 @@ class LxmlParser(ParserBase):
return elem.attrib
def elem_content(self, elem):
- return smart_unicode(elem.text)
+ return smart_text(elem.text)
def elem_name(self, elem):
return elem.tag
def elem_str(self, elem):
- elem_as_string = smart_unicode(
- self.tostring(elem, method='html', encoding=unicode))
+ elem_as_string = smart_text(
+ self.tostring(elem, method='html', encoding=six.text_type))
if elem.tag == 'link':
# This makes testcases happy
return elem_as_string.replace('>', ' />')