summaryrefslogtreecommitdiff
path: root/lib/banzai
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2018-03-05 15:39:36 +0000
committerDouwe Maan <douwe@gitlab.com>2018-03-05 15:39:36 +0000
commit089b2fb9fc4aace5dca759eec87c980c780192d5 (patch)
tree5659343c54fced2149aa252b75c62b82e6f16641 /lib/banzai
parentfa1debefe7feb7980863aaa383d5a02e488b9a94 (diff)
parentcb55bc3c0770adf7122d7cf49b12cb45c43de7ec (diff)
downloadgitlab-ce-089b2fb9fc4aace5dca759eec87c980c780192d5.tar.gz
Merge branch '41719-mr-title-fix' into 'master'
Render htmlentities correctly for links not supported by Rinku Closes #41719 See merge request gitlab-org/gitlab-ce!17180
Diffstat (limited to 'lib/banzai')
-rw-r--r--lib/banzai/filter/autolink_filter.rb86
1 files changed, 40 insertions, 46 deletions
diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb
index b8d2673c1a6..75b64ae9af2 100644
--- a/lib/banzai/filter/autolink_filter.rb
+++ b/lib/banzai/filter/autolink_filter.rb
@@ -25,8 +25,8 @@ module Banzai
# period or comma for punctuation without those characters being included
# in the generated link.
#
- # Rubular: http://rubular.com/r/cxjPyZc7Sb
- LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)}
+ # Rubular: http://rubular.com/r/JzPhi6DCZp
+ LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://[^\s>]+)(?<!,|\.)}
# Text matching LINK_PATTERN inside these elements will not be linked
IGNORE_PARENTS = %w(a code kbd pre script style).to_set
@@ -35,53 +35,19 @@ module Banzai
TEXT_QUERY = %Q(descendant-or-self::text()[
not(#{IGNORE_PARENTS.map { |p| "ancestor::#{p}" }.join(' or ')})
and contains(., '://')
- and not(starts-with(., 'http'))
- and not(starts-with(., 'ftp'))
]).freeze
+ PUNCTUATION_PAIRS = {
+ "'" => "'",
+ '"' => '"',
+ ')' => '(',
+ ']' => '[',
+ '}' => '{'
+ }.freeze
+
def call
return doc if context[:autolink] == false
- rinku_parse
- text_parse
- end
-
- private
-
- # Run the text through Rinku as a first pass
- #
- # This will quickly autolink http(s) and ftp links.
- #
- # `@doc` will be re-parsed with the HTML String from Rinku.
- def rinku_parse
- # Convert the options from a Hash to a String that Rinku expects
- options = tag_options(link_options)
-
- # NOTE: We don't parse email links because it will erroneously match
- # external Commit and CommitRange references.
- #
- # The final argument tells Rinku to link short URLs that don't include a
- # period (e.g., http://localhost:3000/)
- rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1)
-
- return if rinku == html
-
- # Rinku returns a String, so parse it back to a Nokogiri::XML::Document
- # for further processing.
- @doc = parse_html(rinku)
- end
-
- # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
- def contains_unsafe?(scheme)
- return false unless scheme
-
- scheme = scheme.strip.downcase
- Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
- end
-
- # Autolinks any text matching LINK_PATTERN that Rinku didn't already
- # replace
- def text_parse
doc.xpath(TEXT_QUERY).each do |node|
content = node.to_html
@@ -97,6 +63,16 @@ module Banzai
doc
end
+ private
+
+ # Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
+ def contains_unsafe?(scheme)
+ return false unless scheme
+
+ scheme = scheme.strip.downcase
+ Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
+ end
+
def autolink_match(match)
# start by stripping out dangerous links
begin
@@ -112,12 +88,30 @@ module Banzai
match.gsub!(/((?:&[\w#]+;)+)\z/, '')
dropped = ($1 || '').html_safe
+ # To match the behaviour of Rinku, if the matched link ends with a
+ # closing part of a matched pair of punctuation, we remove that trailing
+ # character unless there are an equal number of closing and opening
+ # characters in the link.
+ if match.end_with?(*PUNCTUATION_PAIRS.keys)
+ close_character = match[-1]
+ close_count = match.count(close_character)
+ open_character = PUNCTUATION_PAIRS[close_character]
+ open_count = match.count(open_character)
+
+ if open_count != close_count || open_character == close_character
+ dropped += close_character
+ match = match[0..-2]
+ end
+ end
+
options = link_options.merge(href: match)
- content_tag(:a, match, options) + dropped
+ content_tag(:a, match.html_safe, options) + dropped
end
def autolink_filter(text)
- text.gsub(LINK_PATTERN) { |match| autolink_match(match) }
+ Gitlab::StringRegexMarker.new(CGI.unescapeHTML(text), text.html_safe).mark(LINK_PATTERN) do |link, left:, right:|
+ autolink_match(link)
+ end
end
def link_options