diff options
author | Douwe Maan <douwe@gitlab.com> | 2018-03-05 15:39:36 +0000 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2018-03-05 15:39:36 +0000 |
commit | 089b2fb9fc4aace5dca759eec87c980c780192d5 (patch) | |
tree | 5659343c54fced2149aa252b75c62b82e6f16641 /lib/banzai | |
parent | fa1debefe7feb7980863aaa383d5a02e488b9a94 (diff) | |
parent | cb55bc3c0770adf7122d7cf49b12cb45c43de7ec (diff) | |
download | gitlab-ce-089b2fb9fc4aace5dca759eec87c980c780192d5.tar.gz |
Merge branch '41719-mr-title-fix' into 'master'
Render htmlentities correctly for links not supported by Rinku
Closes #41719
See merge request gitlab-org/gitlab-ce!17180
Diffstat (limited to 'lib/banzai')
-rw-r--r-- | lib/banzai/filter/autolink_filter.rb | 86 |
1 files changed, 40 insertions, 46 deletions
diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index b8d2673c1a6..75b64ae9af2 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -25,8 +25,8 @@ module Banzai # period or comma for punctuation without those characters being included # in the generated link. # - # Rubular: http://rubular.com/r/cxjPyZc7Sb - LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)} + # Rubular: http://rubular.com/r/JzPhi6DCZp + LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://[^\s>]+)(?<!,|\.)} # Text matching LINK_PATTERN inside these elements will not be linked IGNORE_PARENTS = %w(a code kbd pre script style).to_set @@ -35,53 +35,19 @@ module Banzai TEXT_QUERY = %Q(descendant-or-self::text()[ not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')}) and contains(., '://') - and not(starts-with(., 'http')) - and not(starts-with(., 'ftp')) ]).freeze + PUNCTUATION_PAIRS = { + "'" => "'", + '"' => '"', + ')' => '(', + ']' => '[', + '}' => '{' + }.freeze + def call return doc if context[:autolink] == false - rinku_parse - text_parse - end - - private - - # Run the text through Rinku as a first pass - # - # This will quickly autolink http(s) and ftp links. - # - # `@doc` will be re-parsed with the HTML String from Rinku. - def rinku_parse - # Convert the options from a Hash to a String that Rinku expects - options = tag_options(link_options) - - # NOTE: We don't parse email links because it will erroneously match - # external Commit and CommitRange references. - # - # The final argument tells Rinku to link short URLs that don't include a - # period (e.g., http://localhost:3000/) - rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1) - - return if rinku == html - - # Rinku returns a String, so parse it back to a Nokogiri::XML::Document - # for further processing. - @doc = parse_html(rinku) - end - - # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme - def contains_unsafe?(scheme) - return false unless scheme - - scheme = scheme.strip.downcase - Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) } - end - - # Autolinks any text matching LINK_PATTERN that Rinku didn't already - # replace - def text_parse doc.xpath(TEXT_QUERY).each do |node| content = node.to_html @@ -97,6 +63,16 @@ module Banzai doc end + private + + # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme + def contains_unsafe?(scheme) + return false unless scheme + + scheme = scheme.strip.downcase + Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) } + end + def autolink_match(match) # start by stripping out dangerous links begin @@ -112,12 +88,30 @@ module Banzai match.gsub!(/((?:&[\w#]+;)+)\z/, '') dropped = ($1 || '').html_safe + # To match the behaviour of Rinku, if the matched link ends with a + # closing part of a matched pair of punctuation, we remove that trailing + # character unless there are an equal number of closing and opening + # characters in the link. + if match.end_with?(*PUNCTUATION_PAIRS.keys) + close_character = match[-1] + close_count = match.count(close_character) + open_character = PUNCTUATION_PAIRS[close_character] + open_count = match.count(open_character) + + if open_count != close_count || open_character == close_character + dropped += close_character + match = match[0..-2] + end + end + options = link_options.merge(href: match) - content_tag(:a, match, options) + dropped + content_tag(:a, match.html_safe, options) + dropped end def autolink_filter(text) - text.gsub(LINK_PATTERN) { |match| autolink_match(match) } + Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:| + autolink_match(link) + end end def link_options |