diff options
author | Christine Koppelt <ch.ko123@googlemail.com> | 2013-04-27 18:09:53 +0200 |
---|---|---|
committer | Christine Koppelt <ch.ko123@googlemail.com> | 2013-04-27 18:09:53 +0200 |
commit | 1d28e36ac4e848c6006673283bf9536a04ba3150 (patch) | |
tree | ee550122ca094ad2b81d3476352f2f3ff06f43e5 | |
parent | 7698fac30c6f859a1150a79a67540fe3b3fd77d1 (diff) | |
download | python-lxml-1d28e36ac4e848c6006673283bf9536a04ba3150.tar.gz |
proper handling of 'rel' attribute for links (Bug 971754)
-rw-r--r-- | src/lxml/html/clean.py | 5 | ||||
-rw-r--r-- | src/lxml/html/tests/test_clean.txt | 13 |
2 files changed, 17 insertions, 1 deletions
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py index dabc1257..e94eec26 100644 --- a/src/lxml/html/clean.py +++ b/src/lxml/html/clean.py @@ -399,7 +399,10 @@ class Cleaner(object): if self.add_nofollow: for el in _find_external_links(doc): if not self.allow_follow(el): - el.set('rel', 'nofollow') + rel = 'nofollow' + if el.get('rel'): + rel = el.get('rel') + ' ' + rel + el.set('rel', rel) def allow_follow(self, anchor): """ diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt index d87a6619..21bd12c6 100644 --- a/src/lxml/html/tests/test_clean.txt +++ b/src/lxml/html/tests/test_clean.txt @@ -27,6 +27,8 @@ ... Password: <input type="password" name="password"> ... </form> ... <a href="evil-site">spam spam SPAM!</a> +... <a href="http://example.com" rel="author">Author</a> +... <a href="http://example.com">Text</a> ... <img src="evil!"> ... </body> ... </html>''' @@ -57,6 +59,8 @@ Password: <input type="password" name="password"> </form> <a href="evil-site">spam spam SPAM!</a> + <a href="http://example.com" rel="author">Author</a> + <a href="http://example.com">Text</a> <img src="evil!"> </body> </html> @@ -87,6 +91,8 @@ Password: <input type="password" name="password"> </form> <a href="evil-site">spam spam SPAM!</a> + <a href="http://example.com" rel="author">Author</a> + <a href="http://example.com">Text</a> <img src="evil!"> </body> </html> @@ -105,6 +111,8 @@ of EVIL! Password: <a href="evil-site">spam spam SPAM!</a> + <a href="http://example.com" rel="author">Author</a> + <a href="http://example.com">Text</a> <img src="evil!"> </body> </html> @@ -122,6 +130,8 @@ of EVIL! Password: <a href="evil-site" rel="nofollow">spam spam SPAM!</a> + <a href="http://example.com" rel="author nofollow">Author</a> + <a href="http://example.com" rel="nofollow">Text</a> <img src="evil!"> </body> </html> @@ -143,6 +153,9 @@ of EVIL! Password: <a href="evil-site">spam spam SPAM!</a> + <a href="http://example.com" rel="author">Author</a> + <a href="http://example.com">Text</a> <img src="evil!"> </body> </html> + |