Merge branch 'release/1.4'1.4

author: Mathieu Pillard <m@virgule.net> 2014-05-20 00:27:54 +0200
committer: Mathieu Pillard <m@virgule.net> 2014-05-20 00:27:54 +0200
commit: e747dce3d7e04fe595bbfed54f9554c2725eb757 (patch)
tree: f2c2e310e48062893f8224e08364bbff043afd84 /compressor/parser
parent: 772ecd1ef2ce021d05cbc44eb8602f7d59db2c52 (diff)
parent: 804c302495bd9d043f830ed012c76183eb5a1e2d (diff)
download: django-compressor-1.4.tar.gz
5 files changed, 87 insertions, 48 deletions
diff --git a/compressor/parser/__init__.py b/compressor/parser/__init__.py
index bc8c18c..a3fe78f 100644
--- a/compressor/parser/__init__.py
+++ b/compressor/parser/__init__.py
@@ -1,3 +1,4 @@
+from django.utils import six
 from django.utils.functional import LazyObject
 from django.utils.importlib import import_module
 
@@ -11,8 +12,9 @@ from compressor.parser.html5lib import Html5LibParser  # noqa
 
 class AutoSelectParser(LazyObject):
     options = (
-        ('lxml.html', LxmlParser),   # lxml, extremely fast
-        ('HTMLParser', HtmlParser),  # fast and part of the Python stdlib
+        # TODO: make lxml.html parser first again
+        (six.moves.html_parser.__name__, HtmlParser),  # fast and part of the Python stdlib
+        ('lxml.html', LxmlParser),  # lxml, extremely fast
     )
 
     def __init__(self, content):
diff --git a/compressor/parser/beautifulsoup.py b/compressor/parser/beautifulsoup.py
index 498cde8..d143df4 100644
--- a/compressor/parser/beautifulsoup.py
+++ b/compressor/parser/beautifulsoup.py
@@ -1,6 +1,7 @@
 from __future__ import absolute_import
 from django.core.exceptions import ImproperlyConfigured
-from django.utils.encoding import smart_unicode
+from django.utils import six
+from django.utils.encoding import smart_text
 
 from compressor.exceptions import ParserError
 from compressor.parser import ParserBase
@@ -12,18 +13,27 @@ class BeautifulSoupParser(ParserBase):
     @cached_property
     def soup(self):
         try:
-            from BeautifulSoup import BeautifulSoup
+            if six.PY3:
+                from bs4 import BeautifulSoup
+            else:
+                from BeautifulSoup import BeautifulSoup
             return BeautifulSoup(self.content)
-        except ImportError, err:
+        except ImportError as err:
             raise ImproperlyConfigured("Error while importing BeautifulSoup: %s" % err)
-        except Exception, err:
+        except Exception as err:
             raise ParserError("Error while initializing Parser: %s" % err)
 
     def css_elems(self):
-        return self.soup.findAll({'link': True, 'style': True})
+        if six.PY3:
+            return self.soup.find_all({'link': True, 'style': True})
+        else:
+            return self.soup.findAll({'link': True, 'style': True})
 
     def js_elems(self):
-        return self.soup.findAll('script')
+        if six.PY3:
+            return self.soup.find_all('script')
+        else:
+            return self.soup.findAll('script')
 
     def elem_attribs(self, elem):
         return dict(elem.attrs)
@@ -35,4 +45,4 @@ class BeautifulSoupParser(ParserBase):
         return elem.name
 
     def elem_str(self, elem):
-        return smart_unicode(elem)
+        return smart_text(elem)
diff --git a/compressor/parser/default_htmlparser.py b/compressor/parser/default_htmlparser.py
index 8425d77..80272cb 100644
--- a/compressor/parser/default_htmlparser.py
+++ b/compressor/parser/default_htmlparser.py
@@ -1,13 +1,13 @@
-from HTMLParser import HTMLParser
-from django.utils.encoding import smart_unicode
+from django.utils import six
+from django.utils.encoding import smart_text
+
 from compressor.exceptions import ParserError
 from compressor.parser import ParserBase
 
 
-class DefaultHtmlParser(ParserBase, HTMLParser):
-
+class DefaultHtmlParser(ParserBase, six.moves.html_parser.HTMLParser):
     def __init__(self, content):
-        HTMLParser.__init__(self)
+        six.moves.html_parser.HTMLParser.__init__(self)
         self.content = content
         self._css_elems = []
         self._js_elems = []
@@ -15,7 +15,7 @@ class DefaultHtmlParser(ParserBase, HTMLParser):
         try:
             self.feed(self.content)
             self.close()
-        except Exception, err:
+        except Exception as err:
             lineno = err.lineno
             line = self.content.splitlines()[lineno]
             raise ParserError("Error while initializing HtmlParser: %s (line: %s)" % (err, repr(line)))
@@ -65,7 +65,7 @@ class DefaultHtmlParser(ParserBase, HTMLParser):
         return elem['attrs_dict']
 
     def elem_content(self, elem):
-        return smart_unicode(elem['text'])
+        return smart_text(elem['text'])
 
     def elem_str(self, elem):
         tag = {}
diff --git a/compressor/parser/html5lib.py b/compressor/parser/html5lib.py
index 7fee590..b1d0948 100644
--- a/compressor/parser/html5lib.py
+++ b/compressor/parser/html5lib.py
@@ -1,6 +1,6 @@
 from __future__ import absolute_import
-from django.utils.encoding import smart_unicode
 from django.core.exceptions import ImproperlyConfigured
+from django.utils.encoding import smart_text
 
 from compressor.exceptions import ParserError
 from compressor.parser import ParserBase
@@ -15,42 +15,45 @@ class Html5LibParser(ParserBase):
         self.html5lib = html5lib
 
     def _serialize(self, elem):
-        fragment = self.html5lib.treebuilders.simpletree.DocumentFragment()
-        fragment.appendChild(elem)
-        return self.html5lib.serialize(fragment,
-            quote_attr_values=True, omit_optional_tags=False)
+        return self.html5lib.serialize(
+            elem, tree="etree", quote_attr_values=True,
+            omit_optional_tags=False, use_trailing_solidus=True,
+        )
 
     def _find(self, *names):
-        for node in self.html.childNodes:
-            if node.type == 5 and node.name in names:
-                yield node
+        for elem in self.html:
+            if elem.tag in names:
+                yield elem
 
     @cached_property
     def html(self):
         try:
-            return self.html5lib.parseFragment(self.content)
-        except ImportError, err:
+            return self.html5lib.parseFragment(self.content, treebuilder="etree")
+        except ImportError as err:
             raise ImproperlyConfigured("Error while importing html5lib: %s" % err)
-        except Exception, err:
+        except Exception as err:
             raise ParserError("Error while initializing Parser: %s" % err)
 
     def css_elems(self):
-        return self._find('style', 'link')
+        return self._find('{http://www.w3.org/1999/xhtml}link',
+                          '{http://www.w3.org/1999/xhtml}style')
 
     def js_elems(self):
-        return self._find('script')
+        return self._find('{http://www.w3.org/1999/xhtml}script')
 
     def elem_attribs(self, elem):
-        return elem.attributes
+        return elem.attrib
 
     def elem_content(self, elem):
-        return elem.childNodes[0].value
+        return smart_text(elem.text)
 
     def elem_name(self, elem):
-        return elem.name
+        if '}' in elem.tag:
+            return elem.tag.split('}')[1]
+        return elem.tag
 
     def elem_str(self, elem):
         # This method serializes HTML in a way that does not pass all tests.
         # However, this method is only called in tests anyway, so it doesn't
         # really matter.
-        return smart_unicode(self._serialize(elem))
+        return smart_text(self._serialize(elem))
diff --git a/compressor/parser/lxml.py b/compressor/parser/lxml.py
index 7bbb561..64a8fcb 100644
--- a/compressor/parser/lxml.py
+++ b/compressor/parser/lxml.py
@@ -1,6 +1,8 @@
-from __future__ import absolute_import
+from __future__ import absolute_import, unicode_literals
+
 from django.core.exceptions import ImproperlyConfigured
-from django.utils.encoding import smart_unicode
+from django.utils import six
+from django.utils.encoding import smart_text
 
 from compressor.exceptions import ParserError
 from compressor.parser import ParserBase
@@ -8,28 +10,50 @@ from compressor.utils.decorators import cached_property
 
 
 class LxmlParser(ParserBase):
-
+    """
+    LxmlParser will use `lxml.html` parser to parse rendered contents of
+    {% compress %} tag. Under python 2 it will also try to use beautiful
+    soup parser in case of any problems with encoding.
+    """
     def __init__(self, content):
         try:
-            from lxml.html import fromstring, soupparser
+            from lxml.html import fromstring
             from lxml.etree import tostring
-            self.fromstring = fromstring
-            self.soupparser = soupparser
-            self.tostring = tostring
-        except ImportError, err:
+        except ImportError as err:
             raise ImproperlyConfigured("Error while importing lxml: %s" % err)
-        except Exception, err:
-            raise ParserError("Error while initializing Parser: %s" % err)
+        except Exception as err:
+            raise ParserError("Error while initializing parser: %s" % err)
+
+        if not six.PY3:
+            # soupparser uses Beautiful Soup 3 which does not run on python 3.x
+            try:
+                from lxml.html import soupparser
+            except ImportError as err:
+                soupparser = None
+            except Exception as err:
+                raise ParserError("Error while initializing parser: %s" % err)
+        else:
+            soupparser = None
+
+        self.soupparser = soupparser
+        self.fromstring = fromstring
+        self.tostring = tostring
         super(LxmlParser, self).__init__(content)
 
     @cached_property
     def tree(self):
+        """
+        Document tree.
+        """
         content = '<root>%s</root>' % self.content
         tree = self.fromstring(content)
         try:
-            self.tostring(tree, encoding=unicode)
+            self.tostring(tree, encoding=six.text_type)
         except UnicodeDecodeError:
-            tree = self.soupparser.fromstring(content)
+            if self.soupparser:  # use soup parser on python 2
+                tree = self.soupparser.fromstring(content)
+            else:  # raise an error on python 3
+                raise
         return tree
 
     def css_elems(self):
@@ -43,14 +67,14 @@ class LxmlParser(ParserBase):
         return elem.attrib
 
     def elem_content(self, elem):
-        return smart_unicode(elem.text)
+        return smart_text(elem.text)
 
     def elem_name(self, elem):
         return elem.tag
 
     def elem_str(self, elem):
-        elem_as_string = smart_unicode(
-            self.tostring(elem, method='html', encoding=unicode))
+        elem_as_string = smart_text(
+            self.tostring(elem, method='html', encoding=six.text_type))
         if elem.tag == 'link':
             # This makes testcases happy
             return elem_as_string.replace('>', ' />')
author	Mathieu Pillard <m@virgule.net>	2014-05-20 00:27:54 +0200
committer	Mathieu Pillard <m@virgule.net>	2014-05-20 00:27:54 +0200
commit	e747dce3d7e04fe595bbfed54f9554c2725eb757 (patch)
tree	f2c2e310e48062893f8224e08364bbff043afd84 /compressor/parser
parent	772ecd1ef2ce021d05cbc44eb8602f7d59db2c52 (diff)
parent	804c302495bd9d043f830ed012c76183eb5a1e2d (diff)
download	django-compressor-1.4.tar.gz