From 2b0bdf759009abd954d4dfddb5f82ad1eecb085d Mon Sep 17 00:00:00 2001 From: Christine Koppelt Date: Sat, 27 Apr 2013 12:22:16 +0200 Subject: fix for Bug #715687 (Consider host_whitelist and whitelist_tags before deleting element) --- src/lxml/html/clean.py | 3 ++- src/lxml/html/tests/test_clean.txt | 30 ++++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py index 4a6912f0..2fa7a2de 100644 --- a/src/lxml/html/clean.py +++ b/src/lxml/html/clean.py @@ -312,7 +312,8 @@ class Cleaner(object): for el in list(doc.iter('link')): if 'stylesheet' in el.get('rel', '').lower(): # Note this kills alternate stylesheets as well - el.drop_tree() + if not self.allow_element(el): + el.drop_tree() if self.meta: kill_tags.add('meta') if self.page_structure: diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt index a8e2959b..d87a6619 100644 --- a/src/lxml/html/tests/test_clean.txt +++ b/src/lxml/html/tests/test_clean.txt @@ -6,6 +6,8 @@ ... ... ... +... +... ... + + + a link + data + another link +

a paragraph

+
secret EVIL!
+ of EVIL! + Password: + spam spam SPAM! + + + -- cgit v1.2.1