summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorscoder <stefan_ml@behnel.de>2013-04-28 05:34:15 -0700
committerscoder <stefan_ml@behnel.de>2013-04-28 05:34:15 -0700
commit3e7c45c5bcad2c5ddcaf57ffc2cf1c186ba46093 (patch)
treef856674a1320ef6f0b129b7dbbbd5c76e89f746a
parentdf91a7354de4d77368f8102c0a438051a789ab0e (diff)
parent1d28e36ac4e848c6006673283bf9536a04ba3150 (diff)
downloadpython-lxml-3e7c45c5bcad2c5ddcaf57ffc2cf1c186ba46093.tar.gz
Merge pull request #120 from cko/relattr2
proper handling of 'rel' attribute for links (Bug 971754)
-rw-r--r--src/lxml/html/clean.py5
-rw-r--r--src/lxml/html/tests/test_clean.txt13
2 files changed, 17 insertions, 1 deletions
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
index dabc1257..e94eec26 100644
--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
@@ -399,7 +399,10 @@ class Cleaner(object):
if self.add_nofollow:
for el in _find_external_links(doc):
if not self.allow_follow(el):
- el.set('rel', 'nofollow')
+ rel = 'nofollow'
+ if el.get('rel'):
+ rel = el.get('rel') + ' ' + rel
+ el.set('rel', rel)
def allow_follow(self, anchor):
"""
diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt
index d87a6619..21bd12c6 100644
--- a/src/lxml/html/tests/test_clean.txt
+++ b/src/lxml/html/tests/test_clean.txt
@@ -27,6 +27,8 @@
... Password: <input type="password" name="password">
... </form>
... <a href="evil-site">spam spam SPAM!</a>
+... <a href="http://example.com" rel="author">Author</a>
+... <a href="http://example.com">Text</a>
... <img src="evil!">
... </body>
... </html>'''
@@ -57,6 +59,8 @@
Password: <input type="password" name="password">
</form>
<a href="evil-site">spam spam SPAM!</a>
+ <a href="http://example.com" rel="author">Author</a>
+ <a href="http://example.com">Text</a>
<img src="evil!">
</body>
</html>
@@ -87,6 +91,8 @@
Password: <input type="password" name="password">
</form>
<a href="evil-site">spam spam SPAM!</a>
+ <a href="http://example.com" rel="author">Author</a>
+ <a href="http://example.com">Text</a>
<img src="evil!">
</body>
</html>
@@ -105,6 +111,8 @@
of EVIL!
Password:
<a href="evil-site">spam spam SPAM!</a>
+ <a href="http://example.com" rel="author">Author</a>
+ <a href="http://example.com">Text</a>
<img src="evil!">
</body>
</html>
@@ -122,6 +130,8 @@
of EVIL!
Password:
<a href="evil-site" rel="nofollow">spam spam SPAM!</a>
+ <a href="http://example.com" rel="author nofollow">Author</a>
+ <a href="http://example.com" rel="nofollow">Text</a>
<img src="evil!">
</body>
</html>
@@ -143,6 +153,9 @@
of EVIL!
Password:
<a href="evil-site">spam spam SPAM!</a>
+ <a href="http://example.com" rel="author">Author</a>
+ <a href="http://example.com">Text</a>
<img src="evil!">
</body>
</html>
+