diff options
author | scoder <stefan_ml@behnel.de> | 2013-04-28 05:34:15 -0700 |
---|---|---|
committer | scoder <stefan_ml@behnel.de> | 2013-04-28 05:34:15 -0700 |
commit | 3e7c45c5bcad2c5ddcaf57ffc2cf1c186ba46093 (patch) | |
tree | f856674a1320ef6f0b129b7dbbbd5c76e89f746a | |
parent | df91a7354de4d77368f8102c0a438051a789ab0e (diff) | |
parent | 1d28e36ac4e848c6006673283bf9536a04ba3150 (diff) | |
download | python-lxml-3e7c45c5bcad2c5ddcaf57ffc2cf1c186ba46093.tar.gz |
Merge pull request #120 from cko/relattr2
proper handling of 'rel' attribute for links (Bug 971754)
-rw-r--r-- | src/lxml/html/clean.py | 5 | ||||
-rw-r--r-- | src/lxml/html/tests/test_clean.txt | 13 |
2 files changed, 17 insertions, 1 deletions
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py index dabc1257..e94eec26 100644 --- a/src/lxml/html/clean.py +++ b/src/lxml/html/clean.py @@ -399,7 +399,10 @@ class Cleaner(object): if self.add_nofollow: for el in _find_external_links(doc): if not self.allow_follow(el): - el.set('rel', 'nofollow') + rel = 'nofollow' + if el.get('rel'): + rel = el.get('rel') + ' ' + rel + el.set('rel', rel) def allow_follow(self, anchor): """ diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt index d87a6619..21bd12c6 100644 --- a/src/lxml/html/tests/test_clean.txt +++ b/src/lxml/html/tests/test_clean.txt @@ -27,6 +27,8 @@ ... Password: <input type="password" name="password"> ... </form> ... <a href="evil-site">spam spam SPAM!</a> +... <a href="http://example.com" rel="author">Author</a> +... <a href="http://example.com">Text</a> ... <img src="evil!"> ... </body> ... </html>''' @@ -57,6 +59,8 @@ Password: <input type="password" name="password"> </form> <a href="evil-site">spam spam SPAM!</a> + <a href="http://example.com" rel="author">Author</a> + <a href="http://example.com">Text</a> <img src="evil!"> </body> </html> @@ -87,6 +91,8 @@ Password: <input type="password" name="password"> </form> <a href="evil-site">spam spam SPAM!</a> + <a href="http://example.com" rel="author">Author</a> + <a href="http://example.com">Text</a> <img src="evil!"> </body> </html> @@ -105,6 +111,8 @@ of EVIL! Password: <a href="evil-site">spam spam SPAM!</a> + <a href="http://example.com" rel="author">Author</a> + <a href="http://example.com">Text</a> <img src="evil!"> </body> </html> @@ -122,6 +130,8 @@ of EVIL! Password: <a href="evil-site" rel="nofollow">spam spam SPAM!</a> + <a href="http://example.com" rel="author nofollow">Author</a> + <a href="http://example.com" rel="nofollow">Text</a> <img src="evil!"> </body> </html> @@ -143,6 +153,9 @@ of EVIL! Password: <a href="evil-site">spam spam SPAM!</a> + <a href="http://example.com" rel="author">Author</a> + <a href="http://example.com">Text</a> <img src="evil!"> </body> </html> + |