1 files changed, 39 insertions, 15 deletions
diff --git a/compressor/parser/lxml.py b/compressor/parser/lxml.py
index 7bbb561..64a8fcb 100644
--- a/compressor/parser/lxml.py
+++ b/compressor/parser/lxml.py
@@ -1,6 +1,8 @@
-from __future__ import absolute_import
+from __future__ import absolute_import, unicode_literals
+
 from django.core.exceptions import ImproperlyConfigured
-from django.utils.encoding import smart_unicode
+from django.utils import six
+from django.utils.encoding import smart_text
 
 from compressor.exceptions import ParserError
 from compressor.parser import ParserBase
@@ -8,28 +10,50 @@ from compressor.utils.decorators import cached_property
 
 
 class LxmlParser(ParserBase):
-
+    """
+    LxmlParser will use `lxml.html` parser to parse rendered contents of
+    {% compress %} tag. Under python 2 it will also try to use beautiful
+    soup parser in case of any problems with encoding.
+    """
     def __init__(self, content):
         try:
-            from lxml.html import fromstring, soupparser
+            from lxml.html import fromstring
             from lxml.etree import tostring
-            self.fromstring = fromstring
-            self.soupparser = soupparser
-            self.tostring = tostring
-        except ImportError, err:
+        except ImportError as err:
             raise ImproperlyConfigured("Error while importing lxml: %s" % err)
-        except Exception, err:
-            raise ParserError("Error while initializing Parser: %s" % err)
+        except Exception as err:
+            raise ParserError("Error while initializing parser: %s" % err)
+
+        if not six.PY3:
+            # soupparser uses Beautiful Soup 3 which does not run on python 3.x
+            try:
+                from lxml.html import soupparser
+            except ImportError as err:
+                soupparser = None
+            except Exception as err:
+                raise ParserError("Error while initializing parser: %s" % err)
+        else:
+            soupparser = None
+
+        self.soupparser = soupparser
+        self.fromstring = fromstring
+        self.tostring = tostring
         super(LxmlParser, self).__init__(content)
 
     @cached_property
     def tree(self):
+        """
+        Document tree.
+        """
         content = '<root>%s</root>' % self.content
         tree = self.fromstring(content)
         try:
-            self.tostring(tree, encoding=unicode)
+            self.tostring(tree, encoding=six.text_type)
         except UnicodeDecodeError:
-            tree = self.soupparser.fromstring(content)
+            if self.soupparser:  # use soup parser on python 2
+                tree = self.soupparser.fromstring(content)
+            else:  # raise an error on python 3
+                raise
         return tree
 
     def css_elems(self):
@@ -43,14 +67,14 @@ class LxmlParser(ParserBase):
         return elem.attrib
 
     def elem_content(self, elem):
-        return smart_unicode(elem.text)
+        return smart_text(elem.text)
 
     def elem_name(self, elem):
         return elem.tag
 
     def elem_str(self, elem):
-        elem_as_string = smart_unicode(
-            self.tostring(elem, method='html', encoding=unicode))
+        elem_as_string = smart_text(
+            self.tostring(elem, method='html', encoding=six.text_type))
         if elem.tag == 'link':
             # This makes testcases happy
             return elem_as_string.replace('>', ' />')