summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristine Koppelt <ch.ko123@googlemail.com>2013-04-27 18:09:53 +0200
committerChristine Koppelt <ch.ko123@googlemail.com>2013-04-27 18:09:53 +0200
commit1d28e36ac4e848c6006673283bf9536a04ba3150 (patch)
treeee550122ca094ad2b81d3476352f2f3ff06f43e5
parent7698fac30c6f859a1150a79a67540fe3b3fd77d1 (diff)
downloadpython-lxml-1d28e36ac4e848c6006673283bf9536a04ba3150.tar.gz
proper handling of 'rel' attribute for links (Bug 971754)
-rw-r--r--src/lxml/html/clean.py5
-rw-r--r--src/lxml/html/tests/test_clean.txt13
2 files changed, 17 insertions, 1 deletions
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
index dabc1257..e94eec26 100644
--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
@@ -399,7 +399,10 @@ class Cleaner(object):
if self.add_nofollow:
for el in _find_external_links(doc):
if not self.allow_follow(el):
- el.set('rel', 'nofollow')
+ rel = 'nofollow'
+ if el.get('rel'):
+ rel = el.get('rel') + ' ' + rel
+ el.set('rel', rel)
def allow_follow(self, anchor):
"""
diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt
index d87a6619..21bd12c6 100644
--- a/src/lxml/html/tests/test_clean.txt
+++ b/src/lxml/html/tests/test_clean.txt
@@ -27,6 +27,8 @@
... Password: <input type="password" name="password">
... </form>
... <a href="evil-site">spam spam SPAM!</a>
+... <a href="http://example.com" rel="author">Author</a>
+... <a href="http://example.com">Text</a>
... <img src="evil!">
... </body>
... </html>'''
@@ -57,6 +59,8 @@
Password: <input type="password" name="password">
</form>
<a href="evil-site">spam spam SPAM!</a>
+ <a href="http://example.com" rel="author">Author</a>
+ <a href="http://example.com">Text</a>
<img src="evil!">
</body>
</html>
@@ -87,6 +91,8 @@
Password: <input type="password" name="password">
</form>
<a href="evil-site">spam spam SPAM!</a>
+ <a href="http://example.com" rel="author">Author</a>
+ <a href="http://example.com">Text</a>
<img src="evil!">
</body>
</html>
@@ -105,6 +111,8 @@
of EVIL!
Password:
<a href="evil-site">spam spam SPAM!</a>
+ <a href="http://example.com" rel="author">Author</a>
+ <a href="http://example.com">Text</a>
<img src="evil!">
</body>
</html>
@@ -122,6 +130,8 @@
of EVIL!
Password:
<a href="evil-site" rel="nofollow">spam spam SPAM!</a>
+ <a href="http://example.com" rel="author nofollow">Author</a>
+ <a href="http://example.com" rel="nofollow">Text</a>
<img src="evil!">
</body>
</html>
@@ -143,6 +153,9 @@
of EVIL!
Password:
<a href="evil-site">spam spam SPAM!</a>
+ <a href="http://example.com" rel="author">Author</a>
+ <a href="http://example.com">Text</a>
<img src="evil!">
</body>
</html>
+