summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2015-05-27 20:26:44 +0000
committerDouwe Maan <douwe@gitlab.com>2015-05-27 20:26:44 +0000
commit13f2ab3e3dabae9fe5013fc6d32d246b534df991 (patch)
treecef2d5cecb4f2682de18eb379e4bea0d76087239
parentc843e092f309d22281205e34658221f17642ea33 (diff)
parent7424d2fa5bd52b7f41ec4359bf80b1649b59706b (diff)
downloadgitlab-ce-13f2ab3e3dabae9fe5013fc6d32d246b534df991.tar.gz
Merge branch 'rs-dont-follow-me' into 'master'
Add ExternalLinkFilter to Markdown pipeline Forces a `rel="nofollow"` attribute on all external links. Addresses internal https://dev.gitlab.org/gitlab/gitlabhq/issues/2314#note_46525 See merge request !727
-rw-r--r--lib/gitlab/markdown.rb2
-rw-r--r--lib/gitlab/markdown/external_link_filter.rb33
-rw-r--r--spec/features/markdown_spec.rb14
-rw-r--r--spec/fixtures/markdown.md.erb9
-rw-r--r--spec/lib/gitlab/markdown/external_link_filter_spec.rb33
5 files changed, 89 insertions, 2 deletions
diff --git a/lib/gitlab/markdown.rb b/lib/gitlab/markdown.rb
index c0fb22e7f36..5db1566f55d 100644
--- a/lib/gitlab/markdown.rb
+++ b/lib/gitlab/markdown.rb
@@ -11,6 +11,7 @@ module Gitlab
autoload :CommitReferenceFilter, 'gitlab/markdown/commit_reference_filter'
autoload :EmojiFilter, 'gitlab/markdown/emoji_filter'
autoload :ExternalIssueReferenceFilter, 'gitlab/markdown/external_issue_reference_filter'
+ autoload :ExternalLinkFilter, 'gitlab/markdown/external_link_filter'
autoload :IssueReferenceFilter, 'gitlab/markdown/issue_reference_filter'
autoload :LabelReferenceFilter, 'gitlab/markdown/label_reference_filter'
autoload :MergeRequestReferenceFilter, 'gitlab/markdown/merge_request_reference_filter'
@@ -103,6 +104,7 @@ module Gitlab
Gitlab::Markdown::EmojiFilter,
Gitlab::Markdown::TableOfContentsFilter,
Gitlab::Markdown::AutolinkFilter,
+ Gitlab::Markdown::ExternalLinkFilter,
Gitlab::Markdown::UserReferenceFilter,
Gitlab::Markdown::IssueReferenceFilter,
diff --git a/lib/gitlab/markdown/external_link_filter.rb b/lib/gitlab/markdown/external_link_filter.rb
new file mode 100644
index 00000000000..c539e0fb823
--- /dev/null
+++ b/lib/gitlab/markdown/external_link_filter.rb
@@ -0,0 +1,33 @@
+require 'html/pipeline/filter'
+
+module Gitlab
+ module Markdown
+ # HTML Filter to add a `rel="nofollow"` attribute to external links
+ #
+ class ExternalLinkFilter < HTML::Pipeline::Filter
+ def call
+ doc.search('a').each do |node|
+ next unless node.has_attribute?('href')
+
+ link = node.attribute('href').value
+
+ # Skip non-HTTP(S) links
+ next unless link.start_with?('http')
+
+ # Skip internal links
+ next if link.start_with?(internal_url)
+
+ node.set_attribute('rel', 'nofollow')
+ end
+
+ doc
+ end
+
+ private
+
+ def internal_url
+ @internal_url ||= Gitlab.config.gitlab.url
+ end
+ end
+ end
+end
diff --git a/spec/features/markdown_spec.rb b/spec/features/markdown_spec.rb
index d6954174660..ee1b3bf749d 100644
--- a/spec/features/markdown_spec.rb
+++ b/spec/features/markdown_spec.rb
@@ -149,7 +149,7 @@ describe 'GitLab Markdown' do
it 'removes `rel` attribute from links' do
body = get_section('sanitizationfilter')
- expect(body).not_to have_selector('a[rel]')
+ expect(body).not_to have_selector('a[rel="bookmark"]')
end
it "removes `href` from `a` elements if it's fishy" do
@@ -237,6 +237,18 @@ describe 'GitLab Markdown' do
end
end
+ describe 'ExternalLinkFilter' do
+ let(:links) { get_section('externallinkfilter').next_element }
+
+ it 'adds nofollow to external link' do
+ expect(links.css('a').first.to_html).to match 'nofollow'
+ end
+
+ it 'ignores internal link' do
+ expect(links.css('a').last.to_html).not_to match 'nofollow'
+ end
+ end
+
describe 'ReferenceFilter' do
it 'handles references in headers' do
header = @doc.at_css('#reference-filters-eg-1').parent
diff --git a/spec/fixtures/markdown.md.erb b/spec/fixtures/markdown.md.erb
index 26fc4e38e5a..02ab46c905a 100644
--- a/spec/fixtures/markdown.md.erb
+++ b/spec/fixtures/markdown.md.erb
@@ -79,7 +79,7 @@ As permissive as it is, we've allowed even more stuff:
<span>span tag</span>
-<a href="#" rel="nofollow">This is a link with a defined rel attribute, which should be removed</a>
+<a href="#" rel="bookmark">This is a link with a defined rel attribute, which should be removed</a>
<a href="javascript:alert('Hi')">This is a link trying to be sneaky. It gets its link removed entirely.</a>
@@ -127,6 +127,13 @@ But it shouldn't autolink text inside certain tags:
- <a>http://about.gitlab.com/</a>
- <kbd>http://about.gitlab.com/</kbd>
+### ExternalLinkFilter
+
+External links get a `rel="nofollow"` attribute:
+
+- [Google](https://google.com/)
+- [GitLab Root](<%= Gitlab.config.gitlab.url %>)
+
### Reference Filters (e.g., <%= issue.to_reference %>)
References should be parseable even inside _<%= merge_request.to_reference %>_ emphasis.
diff --git a/spec/lib/gitlab/markdown/external_link_filter_spec.rb b/spec/lib/gitlab/markdown/external_link_filter_spec.rb
new file mode 100644
index 00000000000..c2ff4f80a42
--- /dev/null
+++ b/spec/lib/gitlab/markdown/external_link_filter_spec.rb
@@ -0,0 +1,33 @@
+require 'spec_helper'
+
+module Gitlab::Markdown
+ describe ExternalLinkFilter do
+ def filter(html, options = {})
+ described_class.call(html, options)
+ end
+
+ it 'ignores elements without an href attribute' do
+ exp = act = %q(<a id="ignored">Ignore Me</a>)
+ expect(filter(act).to_html).to eq exp
+ end
+
+ it 'ignores non-HTTP(S) links' do
+ exp = act = %q(<a href="irc://irc.freenode.net/gitlab">IRC</a>)
+ expect(filter(act).to_html).to eq exp
+ end
+
+ it 'skips internal links' do
+ internal = Gitlab.config.gitlab.url
+ exp = act = %Q(<a href="#{internal}/sign_in">Login</a>)
+ expect(filter(act).to_html).to eq exp
+ end
+
+ it 'adds rel="nofollow" to external links' do
+ act = %q(<a href="https://google.com/">Google</a>)
+ doc = filter(act)
+
+ expect(doc.at_css('a')).to have_attribute('rel')
+ expect(doc.at_css('a')['rel']).to eq 'nofollow'
+ end
+ end
+end