summaryrefslogtreecommitdiff
path: root/lib/banzai
diff options
context:
space:
mode:
authorBrett Walker <bwalker@gitlab.com>2019-01-14 16:57:54 -0600
committerYorick Peterse <yorickpeterse@gmail.com>2019-01-31 16:52:20 +0100
commit40983f4a9c960bac4eb59a54816ae63177015c51 (patch)
tree6d4a085affdd9af4dd351f4b69b044549b93da28 /lib/banzai
parentb3c13bbb3c62c90dbb9a606b27699df8d681cec3 (diff)
downloadgitlab-ce-40983f4a9c960bac4eb59a54816ae63177015c51.tar.gz
Show tooltip for malicious looking links
Such as those with IDN homographs or embedded right-to-left (RTLO) characters. Autolinked hrefs should be escaped
Diffstat (limited to 'lib/banzai')
-rw-r--r--lib/banzai/filter/autolink_filter.rb13
-rw-r--r--lib/banzai/filter/external_link_filter.rb85
-rw-r--r--lib/banzai/pipeline/email_pipeline.rb1
3 files changed, 88 insertions, 11 deletions
diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb
index deda4b1872e..f3061bad4ff 100644
--- a/lib/banzai/filter/autolink_filter.rb
+++ b/lib/banzai/filter/autolink_filter.rb
@@ -8,6 +8,10 @@ module Banzai
#
# Based on HTML::Pipeline::AutolinkFilter
#
+ # Note that our CommonMark parser, `commonmarker` (using the autolink extension)
+ # handles standard autolinking, like http/https. We detect additional
+ # schemes (smb, rdar, etc).
+ #
# Context options:
# :autolink - Boolean, skips all processing done by this filter when false
# :link_attr - Hash of attributes for the generated links
@@ -107,10 +111,13 @@ module Banzai
end
end
- # match has come from node.to_html above, so we know it's encoded
- # correctly.
+ # Since this came from a Text node, make sure the new href is encoded.
+ # `commonmarker` percent encodes the domains of links it handles, so
+ # do the same (instead of using `normalized_encode`).
+ href_safe = Addressable::URI.encode(match).html_safe
+
html_safe_match = match.html_safe
- options = link_options.merge(href: html_safe_match)
+ options = link_options.merge(href: href_safe)
content_tag(:a, html_safe_match, options) + dropped
end
diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb
index 4f60b6f84c6..61ee3eac216 100644
--- a/lib/banzai/filter/external_link_filter.rb
+++ b/lib/banzai/filter/external_link_filter.rb
@@ -4,17 +4,29 @@ module Banzai
module Filter
# HTML Filter to modify the attributes of external links
class ExternalLinkFilter < HTML::Pipeline::Filter
- SCHEMES = ['http', 'https', nil].freeze
+ SCHEMES = ['http', 'https', nil].freeze
+ RTLO = "\u202E".freeze
+ ENCODED_RTLO = '%E2%80%AE'.freeze
def call
links.each do |node|
- uri = uri(node['href'].to_s)
-
- node.set_attribute('href', uri.to_s) if uri
+ # URI.parse does stricter checking on the url than Addressable,
+ # such as on `mailto:` links. Since we've been using it, do an
+ # initial parse for validity and then use Addressable
+ # for IDN support, etc
+ uri = uri_strict(node['href'].to_s)
+ if uri
+ node.set_attribute('href', uri.to_s)
+ addressable_uri = addressable_uri(node['href'])
+ else
+ addressable_uri = nil
+ end
- if SCHEMES.include?(uri&.scheme) && !internal_url?(uri)
- node.set_attribute('rel', 'nofollow noreferrer noopener')
- node.set_attribute('target', '_blank')
+ unless internal_url?(addressable_uri)
+ punycode_autolink_node!(addressable_uri, node)
+ sanitize_link_text!(node)
+ add_malicious_tooltip!(addressable_uri, node)
+ add_nofollow!(addressable_uri, node)
end
end
@@ -23,12 +35,18 @@ module Banzai
private
- def uri(href)
+ def uri_strict(href)
URI.parse(href)
rescue URI::Error
nil
end
+ def addressable_uri(href)
+ Addressable::URI.parse(href)
+ rescue Addressable::URI::InvalidURIError
+ nil
+ end
+
def links
query = 'descendant-or-self::a[@href and not(@href = "")]'
doc.xpath(query)
@@ -45,6 +63,57 @@ module Banzai
def internal_url
@internal_url ||= URI.parse(Gitlab.config.gitlab.url)
end
+
+ # Only replace an autolink with an IDN with it's punycode
+ # version if we need emailable links. Otherwise let it
+ # be shown normally and the tooltips will show the
+ # punycode version.
+ def punycode_autolink_node!(uri, node)
+ return unless uri
+ return unless context[:emailable_links]
+
+ unencoded_uri_str = Addressable::URI.unencode(node['href'])
+
+ if unencoded_uri_str == node.content && idn?(uri)
+ node.content = uri.normalize
+ end
+ end
+
+ # escape any right-to-left (RTLO) characters in link text
+ def sanitize_link_text!(node)
+ node.inner_html = node.inner_html.gsub(RTLO, ENCODED_RTLO)
+ end
+
+ # If the domain is an international domain name (IDN),
+ # let's expose with a tooltip in case it's intended
+ # to be malicious. This is particularly useful for links
+ # where the link text is not the same as the actual link.
+ # We will continue to show the unicode version of the domain
+ # in autolinked link text, which could contain emojis, etc.
+ #
+ # Also show the tooltip if the url contains the RTLO character,
+ # as this is an indicator of a malicious link
+ def add_malicious_tooltip!(uri, node)
+ if idn?(uri) || has_encoded_rtlo?(uri)
+ node.add_class('has-tooltip')
+ node.set_attribute('title', uri.normalize)
+ end
+ end
+
+ def add_nofollow!(uri, node)
+ if SCHEMES.include?(uri&.scheme)
+ node.set_attribute('rel', 'nofollow noreferrer noopener')
+ node.set_attribute('target', '_blank')
+ end
+ end
+
+ def idn?(uri)
+ uri&.normalized_host&.start_with?('xn--')
+ end
+
+ def has_encoded_rtlo?(uri)
+ uri&.to_s&.include?(ENCODED_RTLO)
+ end
end
end
end
diff --git a/lib/banzai/pipeline/email_pipeline.rb b/lib/banzai/pipeline/email_pipeline.rb
index 0f4dd9d143d..13e6a990407 100644
--- a/lib/banzai/pipeline/email_pipeline.rb
+++ b/lib/banzai/pipeline/email_pipeline.rb
@@ -12,6 +12,7 @@ module Banzai
def self.transform_context(context)
super(context).merge(
only_path: false,
+ emailable_links: true,
no_sourcepos: true
)
end