From 70bbf093aa07d416ea33da24ab015e5d22c0d501 Mon Sep 17 00:00:00 2001 From: Robert Speicher Date: Fri, 8 May 2015 12:17:54 -0400 Subject: Remove class and id attributes from SanitizationFilter whitelist --- doc/markdown/markdown.md | 2 +- lib/gitlab/markdown/sanitization_filter.rb | 19 +++++++++++++-- spec/features/markdown_spec.rb | 28 +++++++++++----------- spec/fixtures/markdown.md.erb | 26 ++++++++++---------- .../gitlab/markdown/sanitization_filter_spec.rb | 20 ++++++++++++---- 5 files changed, 59 insertions(+), 36 deletions(-) diff --git a/doc/markdown/markdown.md b/doc/markdown/markdown.md index e95ddbb7578..30c29084e34 100644 --- a/doc/markdown/markdown.md +++ b/doc/markdown/markdown.md @@ -423,7 +423,7 @@ Quote break. You can also use raw HTML in your Markdown, and it'll mostly work pretty well. -See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes. In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements, as well as the `class`, and `id` attributes on all elements. +See the documentation for HTML::Pipeline's [SanitizationFilter](http://www.rubydoc.info/gems/html-pipeline/HTML/Pipeline/SanitizationFilter#WHITELIST-constant) class for the list of allowed HTML tags and attributes. In addition to the default `SanitizationFilter` whitelist, GitLab allows `span` elements. ```no-highlight
diff --git a/lib/gitlab/markdown/sanitization_filter.rb b/lib/gitlab/markdown/sanitization_filter.rb index 9a154e0b2fe..6f33155badf 100644 --- a/lib/gitlab/markdown/sanitization_filter.rb +++ b/lib/gitlab/markdown/sanitization_filter.rb @@ -10,8 +10,9 @@ module Gitlab def whitelist whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST - # Allow `class` and `id` on all elements - whitelist[:attributes][:all].push('class', 'id') + # Allow code highlighting + whitelist[:attributes]['pre'] = %w(class) + whitelist[:attributes]['span'] = %w(class) # Allow table alignment whitelist[:attributes]['th'] = %w(style) @@ -23,6 +24,9 @@ module Gitlab # Remove `rel` attribute from `a` elements whitelist[:transformers].push(remove_rel) + # Remove `class` attribute from non-highlight spans + whitelist[:transformers].push(clean_spans) + whitelist end @@ -33,6 +37,17 @@ module Gitlab end end end + + def clean_spans + lambda do |env| + return unless env[:node_name] == 'span' + return unless env[:node].has_attribute?('class') + + unless has_ancestor?(env[:node], 'pre') + env[:node].remove_attribute('class') + end + end + end end end end diff --git a/spec/features/markdown_spec.rb b/spec/features/markdown_spec.rb index 1746ce128e4..8f3dfc8d5a9 100644 --- a/spec/features/markdown_spec.rb +++ b/spec/features/markdown_spec.rb @@ -60,8 +60,8 @@ describe 'GitLab Markdown' do @feat.teardown end - # Given a header ID, goes to that element's parent (the header), then to its - # second sibling (the body). + # Given a header ID, goes to that element's parent (the header itself), then + # its next sibling element (the body). def get_section(id) @doc.at_css("##{id}").parent.next_element end @@ -119,18 +119,18 @@ describe 'GitLab Markdown' do describe 'HTML::Pipeline' do describe 'SanitizationFilter' do it 'uses a permissive whitelist' do - expect(@doc).to have_selector('b#manual-b') - expect(@doc).to have_selector('em#manual-em') - expect(@doc).to have_selector("code#manual-code") + expect(@doc).to have_selector('b:contains("b tag")') + expect(@doc).to have_selector('em:contains("em tag")') + expect(@doc).to have_selector('code:contains("code tag")') expect(@doc).to have_selector('kbd:contains("s")') expect(@doc).to have_selector('strike:contains(Emoji)') - expect(@doc).to have_selector('img#manual-img') - expect(@doc).to have_selector('br#manual-br') - expect(@doc).to have_selector('hr#manual-hr') + expect(@doc).to have_selector('img[src*="smile.png"]') + expect(@doc).to have_selector('br') + expect(@doc).to have_selector('hr') end it 'permits span elements' do - expect(@doc).to have_selector('span#span-class-light.light') + expect(@doc).to have_selector('span:contains("span tag")') end it 'permits table alignment' do @@ -144,13 +144,12 @@ describe 'GitLab Markdown' do end it 'removes `rel` attribute from links' do - expect(@doc).to have_selector('a#a-rel-nofollow') - expect(@doc).not_to have_selector('a#a-rel-nofollow[rel]') + body = get_section('sanitizationfilter') + expect(body).not_to have_selector('a[rel]') end it "removes `href` from `a` elements if it's fishy" do - expect(@doc).to have_selector('a#a-href-javascript') - expect(@doc).not_to have_selector('a#a-href-javascript[href]') + expect(@doc).not_to have_selector('a[href*="javascript"]') end end @@ -228,7 +227,8 @@ describe 'GitLab Markdown' do %w(code a kbd).each do |elem| it "ignores links inside '#{elem}' element" do - expect(@doc.at_css("#{elem}#autolink-#{elem}").child).to be_text + body = get_section('autolinkfilter') + expect(body).not_to have_selector("#{elem} a") end end end diff --git a/spec/fixtures/markdown.md.erb b/spec/fixtures/markdown.md.erb index bc023ecf793..64817ec6700 100644 --- a/spec/fixtures/markdown.md.erb +++ b/spec/fixtures/markdown.md.erb @@ -54,36 +54,34 @@ After the Markdown has been turned into HTML, it gets passed through... ### SanitizationFilter -GitLab uses HTML::Pipeline::SanitizationFilter +GitLab uses HTML::Pipeline::SanitizationFilter to sanitize the generated HTML, stripping dangerous or unwanted tags. Its default whitelist is pretty permissive. Check it: -This text is bold and this text is emphasized. +b tag and em tag. -echo "Hello, world!" +code tag Press s to search. -Emoji Plain old images! +Emoji Plain old images! Here comes a line break: -
+
And a horizontal rule: -
+
As permissive as it is, we've allowed even more stuff: -Span elements +span tag -This is a link with a defined rel attribute, which should be removed +This is a link with a defined rel attribute, which should be removed -This is a link trying to be sneaky. It gets its link removed entirely. +This is a link trying to be sneaky. It gets its link removed entirely. ### Escaping @@ -125,9 +123,9 @@ These are all plain text that should get turned into links: But it shouldn't autolink text inside certain tags: -- http://about.gitlab.com/ -- http://about.gitlab.com/ -- http://about.gitlab.com/ +- http://about.gitlab.com/ +- http://about.gitlab.com/ +- http://about.gitlab.com/ ### Reference Filters (e.g., #<%= issue.iid %>) diff --git a/spec/lib/gitlab/markdown/sanitization_filter_spec.rb b/spec/lib/gitlab/markdown/sanitization_filter_spec.rb index ab909a68635..4a1aa766149 100644 --- a/spec/lib/gitlab/markdown/sanitization_filter_spec.rb +++ b/spec/lib/gitlab/markdown/sanitization_filter_spec.rb @@ -29,17 +29,27 @@ module Gitlab::Markdown exp = act = "
\n
Term
\n
Definition
\n
" expect(filter(act).to_html).to eq exp end + + it 'sanitizes `class` attribute on any element' do + act = %q{Strong} + expect(filter(act).to_html).to eq %q{Strong} + end + + it 'sanitizes `id` attribute on any element' do + act = %q{Emphasis} + expect(filter(act).to_html).to eq %q{Emphasis} + end end describe 'custom whitelist' do - it 'allows `class` attribute on any element' do - exp = act = %q{Strong} + it 'allows syntax highlighting' do + exp = act = %q{
def
} expect(filter(act).to_html).to eq exp end - it 'allows `id` attribute on any element' do - exp = act = %q{Emphasis} - expect(filter(act).to_html).to eq exp + it 'sanitizes `class` attribute from non-highlight spans' do + act = %q{def} + expect(filter(act).to_html).to eq %q{def} end it 'allows `style` attribute on table elements' do -- cgit v1.2.1