summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorscoder <stefan_ml@behnel.de>2013-04-27 05:17:06 -0700
committerscoder <stefan_ml@behnel.de>2013-04-27 05:17:06 -0700
commita790a62b4e07a6c17bad3e7dc9784050123b4a1f (patch)
tree19db55dcf653deb7e7e00878739bfd1229eb0397
parent49268d013d37ad77b87660c11d0cd8120b592681 (diff)
parent2b0bdf759009abd954d4dfddb5f82ad1eecb085d (diff)
downloadpython-lxml-a790a62b4e07a6c17bad3e7dc9784050123b4a1f.tar.gz
Merge pull request #115 from cko/Bug715687
fix for Bug #715687
-rw-r--r--src/lxml/html/clean.py3
-rw-r--r--src/lxml/html/tests/test_clean.txt30
2 files changed, 30 insertions, 3 deletions
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
index 4a6912f0..2fa7a2de 100644
--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
@@ -312,7 +312,8 @@ class Cleaner(object):
for el in list(doc.iter('link')):
if 'stylesheet' in el.get('rel', '').lower():
# Note this kills alternate stylesheets as well
- el.drop_tree()
+ if not self.allow_element(el):
+ el.drop_tree()
if self.meta:
kill_tags.add('meta')
if self.page_structure:
diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt
index a8e2959b..d87a6619 100644
--- a/src/lxml/html/tests/test_clean.txt
+++ b/src/lxml/html/tests/test_clean.txt
@@ -6,6 +6,8 @@
... <head>
... <script type="text/javascript" src="evil-site"></script>
... <link rel="alternate" type="text/rss" src="evil-rss">
+... <link rel="alternate" type="text/rss" href="http://example.com">
+... <link rel="stylesheet" type="text/rss" href="http://example.com">
... <style>
... body {background-image: url(javascript:do_evil)};
... div {background-image: url(data:text/html;base64,PHNjcmlwdD5hbGVydCgidGVzdCIpOzwvc2NyaXB0Pg==)};
@@ -34,6 +36,8 @@
<head>
<script type="text/javascript" src="evil-site"></script>
<link rel="alternate" type="text/rss" src="evil-rss">
+ <link rel="alternate" type="text/rss" href="http://example.com">
+ <link rel="stylesheet" type="text/rss" href="http://example.com">
<style>
body {background-image: url(javascript:do_evil)};
div {background-image: url(data:text/html;base64,PHNjcmlwdD5hbGVydCgidGVzdCIpOzwvc2NyaXB0Pg==)};
@@ -62,6 +66,8 @@
<head>
<script type="text/javascript" src="evil-site"></script>
<link rel="alternate" type="text/rss" src="evil-rss">
+ <link rel="alternate" type="text/rss" href="http://example.com">
+ <link rel="stylesheet" type="text/rss" href="http://example.com">
<style>
body {background-image: url(javascript:do_evil)};
div {background-image: url(data:text/html;base64,PHNjcmlwdD5hbGVydCgidGVzdCIpOzwvc2NyaXB0Pg==)};
@@ -103,8 +109,7 @@
</body>
</html>
->>> print(Cleaner(style=True, links=True, add_nofollow=True,
-... page_structure=False, safe_attrs_only=False).clean_html(doc))
+>>> print(Cleaner(style=True, links=True, add_nofollow=True, page_structure=False, safe_attrs_only=False).clean_html(doc))
<html>
<head>
</head>
@@ -120,3 +125,24 @@
<img src="evil!">
</body>
</html>
+
+>>> print(Cleaner(links=False, page_structure=False, javascript=True, host_whitelist=['example.com'], whitelist_tags=None).clean_html(doc))
+<html>
+ <head>
+ <link rel="alternate" type="text/rss" src="evil-rss">
+ <link rel="alternate" type="text/rss" href="http://example.com">
+ <link rel="stylesheet" type="text/rss" href="http://example.com">
+ <style>/* deleted */</style>
+ </head>
+ <body>
+ <a href="">a link</a>
+ <a href="">data</a>
+ <a href="#">another link</a>
+ <p>a paragraph</p>
+ <div>secret EVIL!</div>
+ of EVIL!
+ Password:
+ <a href="evil-site">spam spam SPAM!</a>
+ <img src="evil!">
+ </body>
+</html>