diff options
| author | Douwe Maan <douwe@gitlab.com> | 2015-12-15 15:51:16 +0100 |
|---|---|---|
| committer | Douwe Maan <douwe@gitlab.com> | 2015-12-15 15:51:16 +0100 |
| commit | 7781bda9bd82997f4a03de4cf911b1156ceb2cde (patch) | |
| tree | a632a12b295694232205e2190f784f9bb79235ee /lib/banzai | |
| parent | 9451db3819ae45734c4343e55a74d347cdacf70d (diff) | |
| download | gitlab-ce-7781bda9bd82997f4a03de4cf911b1156ceb2cde.tar.gz | |
Move Markdown/reference logic from Gitlab::Markdown to Banzai
Diffstat (limited to 'lib/banzai')
41 files changed, 2072 insertions, 0 deletions
diff --git a/lib/banzai/cross_project_reference.rb b/lib/banzai/cross_project_reference.rb new file mode 100644 index 00000000000..ba2866e1efa --- /dev/null +++ b/lib/banzai/cross_project_reference.rb @@ -0,0 +1,22 @@ +require 'banzai' + +module Banzai + # Common methods for ReferenceFilters that support an optional cross-project + # reference. + module CrossProjectReference + # Given a cross-project reference string, get the Project record + # + # Defaults to value of `context[:project]` if: + # * No reference is given OR + # * Reference given doesn't exist + # + # ref - String reference. + # + # Returns a Project, or nil if the reference can't be found + def project_from_ref(ref) + return context[:project] unless ref + + Project.find_with_namespace(ref) + end + end +end diff --git a/lib/banzai/filter.rb b/lib/banzai/filter.rb new file mode 100644 index 00000000000..fd4fe024252 --- /dev/null +++ b/lib/banzai/filter.rb @@ -0,0 +1,10 @@ +require 'active_support/core_ext/string/output_safety' +require 'banzai' + +module Banzai + module Filter + def self.[](name) + const_get("#{name.to_s.camelize}Filter") + end + end +end diff --git a/lib/banzai/filter/abstract_reference_filter.rb b/lib/banzai/filter/abstract_reference_filter.rb new file mode 100644 index 00000000000..bdaa4721b4b --- /dev/null +++ b/lib/banzai/filter/abstract_reference_filter.rb @@ -0,0 +1,145 @@ +require 'banzai' + +module Banzai + module Filter + # Issues, Merge Requests, Snippets, Commits and Commit Ranges share + # similar functionality in reference filtering. + class AbstractReferenceFilter < ReferenceFilter + include CrossProjectReference + + def self.object_class + # Implement in child class + # Example: MergeRequest + end + + def self.object_name + object_class.name.underscore + end + + def self.object_sym + object_name.to_sym + end + + def self.data_reference + "data-#{object_name.dasherize}" + end + + # Public: Find references in text (like `!123` for merge requests) + # + # AnyReferenceFilter.references_in(text) do |match, id, project_ref, matches| + # object = find_object(project_ref, id) + # "<a href=...>#{object.to_reference}</a>" + # end + # + # text - String text to search. + # + # Yields the String match, the Integer referenced object ID, an optional String + # of the external project reference, and all of the matchdata. + # + # Returns a String replaced with the return of the block. + def self.references_in(text, pattern = object_class.reference_pattern) + text.gsub(pattern) do |match| + yield match, $~[object_sym].to_i, $~[:project], $~ + end + end + + def self.referenced_by(node) + { object_sym => LazyReference.new(object_class, node.attr(data_reference)) } + end + + delegate :object_class, :object_sym, :references_in, to: :class + + def find_object(project, id) + # Implement in child class + # Example: project.merge_requests.find + end + + def url_for_object(object, project) + # Implement in child class + # Example: project_merge_request_url + end + + def call + # `#123` + replace_text_nodes_matching(object_class.reference_pattern) do |content| + object_link_filter(content, object_class.reference_pattern) + end + + # `[Issue](#123)`, which is turned into + # `<a href="#123">Issue</a>` + replace_link_nodes_with_href(object_class.reference_pattern) do |link, text| + object_link_filter(link, object_class.reference_pattern, link_text: text) + end + + # `http://gitlab.example.com/namespace/project/issues/123`, which is turned into + # `<a href="http://gitlab.example.com/namespace/project/issues/123">http://gitlab.example.com/namespace/project/issues/123</a>` + replace_link_nodes_with_text(object_class.link_reference_pattern) do |text| + object_link_filter(text, object_class.link_reference_pattern) + end + + # `[Issue](http://gitlab.example.com/namespace/project/issues/123)`, which is turned into + # `<a href="http://gitlab.example.com/namespace/project/issues/123">Issue</a>` + replace_link_nodes_with_href(object_class.link_reference_pattern) do |link, text| + object_link_filter(link, object_class.link_reference_pattern, link_text: text) + end + end + + # Replace references (like `!123` for merge requests) in text with links + # to the referenced object's details page. + # + # text - String text to replace references in. + # pattern - Reference pattern to match against. + # link_text - Original content of the link being replaced. + # + # Returns a String with references replaced with links. All links + # have `gfm` and `gfm-OBJECT_NAME` class names attached for styling. + def object_link_filter(text, pattern, link_text: nil) + references_in(text, pattern) do |match, id, project_ref, matches| + project = project_from_ref(project_ref) + + if project && object = find_object(project, id) + title = escape_once(object_link_title(object)) + klass = reference_class(object_sym) + + data = data_attribute( + original: link_text || match, + project: project.id, + object_sym => object.id + ) + + url = matches[:url] if matches.names.include?("url") + url ||= url_for_object(object, project) + + text = link_text + unless text + text = object.reference_link_text(context[:project]) + + extras = object_link_text_extras(object, matches) + text += " (#{extras.join(", ")})" if extras.any? + end + + %(<a href="#{url}" #{data} + title="#{title}" + class="#{klass}">#{text}</a>) + else + match + end + end + end + + def object_link_text_extras(object, matches) + extras = [] + + if matches.names.include?("anchor") && matches[:anchor] && matches[:anchor] =~ /\A\#note_(\d+)\z/ + extras << "comment #{$1}" + end + + extras + end + + def object_link_title(object) + "#{object_class.name.titleize}: #{object.title}" + end + end + end +end diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb new file mode 100644 index 00000000000..da4ee80c1b5 --- /dev/null +++ b/lib/banzai/filter/autolink_filter.rb @@ -0,0 +1,107 @@ +require 'banzai' +require 'html/pipeline/filter' +require 'uri' + +module Banzai + module Filter + # HTML Filter for auto-linking URLs in HTML. + # + # Based on HTML::Pipeline::AutolinkFilter + # + # Context options: + # :autolink - Boolean, skips all processing done by this filter when false + # :link_attr - Hash of attributes for the generated links + # + class AutolinkFilter < HTML::Pipeline::Filter + include ActionView::Helpers::TagHelper + + # Pattern to match text that should be autolinked. + # + # A URI scheme begins with a letter and may contain letters, numbers, + # plus, period and hyphen. Schemes are case-insensitive but we're being + # picky here and allowing only lowercase for autolinks. + # + # See http://en.wikipedia.org/wiki/URI_scheme + # + # The negative lookbehind ensures that users can paste a URL followed by a + # period or comma for punctuation without those characters being included + # in the generated link. + # + # Rubular: http://rubular.com/r/cxjPyZc7Sb + LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)} + + # Text matching LINK_PATTERN inside these elements will not be linked + IGNORE_PARENTS = %w(a code kbd pre script style).to_set + + def call + return doc if context[:autolink] == false + + rinku_parse + text_parse + end + + private + + # Run the text through Rinku as a first pass + # + # This will quickly autolink http(s) and ftp links. + # + # `@doc` will be re-parsed with the HTML String from Rinku. + def rinku_parse + # Convert the options from a Hash to a String that Rinku expects + options = tag_options(link_options) + + # NOTE: We don't parse email links because it will erroneously match + # external Commit and CommitRange references. + # + # The final argument tells Rinku to link short URLs that don't include a + # period (e.g., http://localhost:3000/) + rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1) + + # Rinku returns a String, so parse it back to a Nokogiri::XML::Document + # for further processing. + @doc = parse_html(rinku) + end + + # Autolinks any text matching LINK_PATTERN that Rinku didn't already + # replace + def text_parse + search_text_nodes(doc).each do |node| + content = node.to_html + + next if has_ancestor?(node, IGNORE_PARENTS) + next unless content.match(LINK_PATTERN) + + # If Rinku didn't link this, there's probably a good reason, so we'll + # skip it too + next if content.start_with?(*%w(http https ftp)) + + html = autolink_filter(content) + + next if html == content + + node.replace(html) + end + + doc + end + + def autolink_filter(text) + text.gsub(LINK_PATTERN) do |match| + # Remove any trailing HTML entities and store them for appending + # outside the link element. The entity must be marked HTML safe in + # order to be output literally rather than escaped. + match.gsub!(/((?:&[\w#]+;)+)\z/, '') + dropped = ($1 || '').html_safe + + options = link_options.merge(href: match) + content_tag(:a, match, options) + dropped + end + end + + def link_options + @link_options ||= context[:link_attr] || {} + end + end + end +end diff --git a/lib/banzai/filter/commit_range_reference_filter.rb b/lib/banzai/filter/commit_range_reference_filter.rb new file mode 100644 index 00000000000..e67cd45ab9b --- /dev/null +++ b/lib/banzai/filter/commit_range_reference_filter.rb @@ -0,0 +1,58 @@ +require 'banzai' + +module Banzai + module Filter + # HTML filter that replaces commit range references with links. + # + # This filter supports cross-project references. + class CommitRangeReferenceFilter < AbstractReferenceFilter + def self.object_class + CommitRange + end + + def self.references_in(text, pattern = CommitRange.reference_pattern) + text.gsub(pattern) do |match| + yield match, $~[:commit_range], $~[:project], $~ + end + end + + def self.referenced_by(node) + project = Project.find(node.attr("data-project")) rescue nil + return unless project + + id = node.attr("data-commit-range") + range = find_object(project, id) + + return unless range + + { commit_range: range } + end + + def initialize(*args) + super + + @commit_map = {} + end + + def self.find_object(project, id) + range = CommitRange.new(id, project) + + range.valid_commits? ? range : nil + end + + def find_object(*args) + self.class.find_object(*args) + end + + def url_for_object(range, project) + h = Gitlab::Application.routes.url_helpers + h.namespace_project_compare_url(project.namespace, project, + range.to_param.merge(only_path: context[:only_path])) + end + + def object_link_title(range) + range.reference_title + end + end + end +end diff --git a/lib/banzai/filter/commit_reference_filter.rb b/lib/banzai/filter/commit_reference_filter.rb new file mode 100644 index 00000000000..9e57608b483 --- /dev/null +++ b/lib/banzai/filter/commit_reference_filter.rb @@ -0,0 +1,63 @@ +require 'banzai' + +module Banzai + module Filter + # HTML filter that replaces commit references with links. + # + # This filter supports cross-project references. + class CommitReferenceFilter < AbstractReferenceFilter + def self.object_class + Commit + end + + def self.references_in(text, pattern = Commit.reference_pattern) + text.gsub(pattern) do |match| + yield match, $~[:commit], $~[:project], $~ + end + end + + def self.referenced_by(node) + project = Project.find(node.attr("data-project")) rescue nil + return unless project + + id = node.attr("data-commit") + commit = find_object(project, id) + + return unless commit + + { commit: commit } + end + + def self.find_object(project, id) + if project && project.valid_repo? + project.commit(id) + end + end + + def find_object(*args) + self.class.find_object(*args) + end + + def url_for_object(commit, project) + h = Gitlab::Application.routes.url_helpers + h.namespace_project_commit_url(project.namespace, project, commit, + only_path: context[:only_path]) + end + + def object_link_title(commit) + commit.link_title + end + + def object_link_text_extras(object, matches) + extras = super + + path = matches[:path] if matches.names.include?("path") + if path == '/builds' + extras.unshift "builds" + end + + extras + end + end + end +end diff --git a/lib/banzai/filter/emoji_filter.rb b/lib/banzai/filter/emoji_filter.rb new file mode 100644 index 00000000000..86838e1483c --- /dev/null +++ b/lib/banzai/filter/emoji_filter.rb @@ -0,0 +1,80 @@ +require 'action_controller' +require 'banzai' +require 'gitlab_emoji' +require 'html/pipeline/filter' + +module Banzai + module Filter + # HTML filter that replaces :emoji: with images. + # + # Based on HTML::Pipeline::EmojiFilter + # + # Context options: + # :asset_root + # :asset_host + class EmojiFilter < HTML::Pipeline::Filter + IGNORED_ANCESTOR_TAGS = %w(pre code tt).to_set + + def call + search_text_nodes(doc).each do |node| + content = node.to_html + next unless content.include?(':') + next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS) + + html = emoji_image_filter(content) + + next if html == content + + node.replace(html) + end + + doc + end + + # Replace :emoji: with corresponding images. + # + # text - String text to replace :emoji: in. + # + # Returns a String with :emoji: replaced with images. + def emoji_image_filter(text) + text.gsub(emoji_pattern) do |match| + name = $1 + "<img class='emoji' title=':#{name}:' alt=':#{name}:' src='#{emoji_url(name)}' height='20' width='20' align='absmiddle' />" + end + end + + private + + def emoji_url(name) + emoji_path = "emoji/#{emoji_filename(name)}" + if context[:asset_host] + # Asset host is specified. + url_to_image(emoji_path) + elsif context[:asset_root] + # Gitlab url is specified + File.join(context[:asset_root], url_to_image(emoji_path)) + else + # All other cases + url_to_image(emoji_path) + end + end + + def url_to_image(image) + ActionController::Base.helpers.url_to_image(image) + end + + # Build a regexp that matches all valid :emoji: names. + def self.emoji_pattern + @emoji_pattern ||= /:(#{Emoji.emojis_names.map { |name| Regexp.escape(name) }.join('|')}):/ + end + + def emoji_pattern + self.class.emoji_pattern + end + + def emoji_filename(name) + "#{Emoji.emoji_filename(name)}.png" + end + end + end +end diff --git a/lib/banzai/filter/external_issue_reference_filter.rb b/lib/banzai/filter/external_issue_reference_filter.rb new file mode 100644 index 00000000000..f5737a7ac19 --- /dev/null +++ b/lib/banzai/filter/external_issue_reference_filter.rb @@ -0,0 +1,69 @@ +require 'banzai' + +module Banzai + module Filter + # HTML filter that replaces external issue tracker references with links. + # References are ignored if the project doesn't use an external issue + # tracker. + class ExternalIssueReferenceFilter < ReferenceFilter + # Public: Find `JIRA-123` issue references in text + # + # ExternalIssueReferenceFilter.references_in(text) do |match, issue| + # "<a href=...>##{issue}</a>" + # end + # + # text - String text to search. + # + # Yields the String match and the String issue reference. + # + # Returns a String replaced with the return of the block. + def self.references_in(text) + text.gsub(ExternalIssue.reference_pattern) do |match| + yield match, $~[:issue] + end + end + + def call + # Early return if the project isn't using an external tracker + return doc if project.nil? || project.default_issues_tracker? + + replace_text_nodes_matching(ExternalIssue.reference_pattern) do |content| + issue_link_filter(content) + end + + replace_link_nodes_with_href(ExternalIssue.reference_pattern) do |link, text| + issue_link_filter(link, link_text: text) + end + end + + # Replace `JIRA-123` issue references in text with links to the referenced + # issue's details page. + # + # text - String text to replace references in. + # + # Returns a String with `JIRA-123` references replaced with links. All + # links have `gfm` and `gfm-issue` class names attached for styling. + def issue_link_filter(text, link_text: nil) + project = context[:project] + + self.class.references_in(text) do |match, issue| + url = url_for_issue(issue, project, only_path: context[:only_path]) + + title = escape_once("Issue in #{project.external_issue_tracker.title}") + klass = reference_class(:issue) + data = data_attribute(project: project.id) + + text = link_text || match + + %(<a href="#{url}" #{data} + title="#{title}" + class="#{klass}">#{text}</a>) + end + end + + def url_for_issue(*args) + IssuesHelper.url_for_issue(*args) + end + end + end +end diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb new file mode 100644 index 00000000000..ac87b9820af --- /dev/null +++ b/lib/banzai/filter/external_link_filter.rb @@ -0,0 +1,34 @@ +require 'banzai' +require 'html/pipeline/filter' + +module Banzai + module Filter + # HTML Filter to add a `rel="nofollow"` attribute to external links + # + class ExternalLinkFilter < HTML::Pipeline::Filter + def call + doc.search('a').each do |node| + link = node.attr('href') + + next unless link + + # Skip non-HTTP(S) links + next unless link.start_with?('http') + + # Skip internal links + next if link.start_with?(internal_url) + + node.set_attribute('rel', 'nofollow') + end + + doc + end + + private + + def internal_url + @internal_url ||= Gitlab.config.gitlab.url + end + end + end +end diff --git a/lib/banzai/filter/issue_reference_filter.rb b/lib/banzai/filter/issue_reference_filter.rb new file mode 100644 index 00000000000..51180cb901a --- /dev/null +++ b/lib/banzai/filter/issue_reference_filter.rb @@ -0,0 +1,23 @@ +require 'banzai' + +module Banzai + module Filter + # HTML filter that replaces issue references with links. References to + # issues that do not exist are ignored. + # + # This filter supports cross-project references. + class IssueReferenceFilter < AbstractReferenceFilter + def self.object_class + Issue + end + + def find_object(project, id) + project.get_issue(id) + end + + def url_for_object(issue, project) + IssuesHelper.url_for_issue(issue.iid, project, only_path: context[:only_path]) + end + end + end +end diff --git a/lib/banzai/filter/label_reference_filter.rb b/lib/banzai/filter/label_reference_filter.rb new file mode 100644 index 00000000000..07bac2dd7fd --- /dev/null +++ b/lib/banzai/filter/label_reference_filter.rb @@ -0,0 +1,96 @@ +require 'banzai' + +module Banzai + module Filter + # HTML filter that replaces label references with links. + class LabelReferenceFilter < ReferenceFilter + # Public: Find label references in text + # + # LabelReferenceFilter.references_in(text) do |match, id, name| + # "<a href=...>#{Label.find(id)}</a>" + # end + # + # text - String text to search. + # + # Yields the String match, an optional Integer label ID, and an optional + # String label name. + # + # Returns a String replaced with the return of the block. + def self.references_in(text) + text.gsub(Label.reference_pattern) do |match| + yield match, $~[:label_id].to_i, $~[:label_name] + end + end + + def self.referenced_by(node) + { label: LazyReference.new(Label, node.attr("data-label")) } + end + + def call + replace_text_nodes_matching(Label.reference_pattern) do |content| + label_link_filter(content) + end + + replace_link_nodes_with_href(Label.reference_pattern) do |link, text| + label_link_filter(link, link_text: text) + end + end + + # Replace label references in text with links to the label specified. + # + # text - String text to replace references in. + # + # Returns a String with label references replaced with links. All links + # have `gfm` and `gfm-label` class names attached for styling. + def label_link_filter(text, link_text: nil) + project = context[:project] + + self.class.references_in(text) do |match, id, name| + params = label_params(id, name) + + if label = project.labels.find_by(params) + url = url_for_label(project, label) + klass = reference_class(:label) + data = data_attribute( + original: link_text || match, + project: project.id, + label: label.id + ) + + text = link_text || render_colored_label(label) + + %(<a href="#{url}" #{data} + class="#{klass}">#{text}</a>) + else + match + end + end + end + + def url_for_label(project, label) + h = Gitlab::Application.routes.url_helpers + h.namespace_project_issues_url( project.namespace, project, label_name: label.name, + only_path: context[:only_path]) + end + + def render_colored_label(label) + LabelsHelper.render_colored_label(label) + end + + # Parameters to pass to `Label.find_by` based on the given arguments + # + # id - Integer ID to pass. If present, returns {id: id} + # name - String name to pass. If `id` is absent, finds by name without + # surrounding quotes. + # + # Returns a Hash. + def label_params(id, name) + if name + { name: name.tr('"', '') } + else + { id: id } + end + end + end + end +end diff --git a/lib/banzai/filter/markdown_filter.rb b/lib/banzai/filter/markdown_filter.rb new file mode 100644 index 00000000000..0072bab1f99 --- /dev/null +++ b/lib/banzai/filter/markdown_filter.rb @@ -0,0 +1,42 @@ +require 'banzai' +require 'html/pipeline/filter' + +module Banzai + module Filter + class MarkdownFilter < HTML::Pipeline::TextFilter + def initialize(text, context = nil, result = nil) + super text, context, result + @text = @text.gsub "\r", '' + end + + def call + html = self.class.renderer.render(@text) + html.rstrip! + html + end + + private + + def self.redcarpet_options + # https://github.com/vmg/redcarpet#and-its-like-really-simple-to-use + @redcarpet_options ||= { + fenced_code_blocks: true, + footnotes: true, + lax_spacing: true, + no_intra_emphasis: true, + space_after_headers: true, + strikethrough: true, + superscript: true, + tables: true + }.freeze + end + + def self.renderer + @renderer ||= begin + renderer = Redcarpet::Render::HTML.new + Redcarpet::Markdown.new(renderer, redcarpet_options) + end + end + end + end +end diff --git a/lib/banzai/filter/merge_request_reference_filter.rb b/lib/banzai/filter/merge_request_reference_filter.rb new file mode 100644 index 00000000000..755b946a34b --- /dev/null +++ b/lib/banzai/filter/merge_request_reference_filter.rb @@ -0,0 +1,41 @@ +require 'banzai' + +module Banzai + module Filter + # HTML filter that replaces merge request references with links. References + # to merge requests that do not exist are ignored. + # + # This filter supports cross-project references. + class MergeRequestReferenceFilter < AbstractReferenceFilter + def self.object_class + MergeRequest + end + + def find_object(project, id) + project.merge_requests.find_by(iid: id) + end + + def url_for_object(mr, project) + h = Gitlab::Application.routes.url_helpers + h.namespace_project_merge_request_url(project.namespace, project, mr, + only_path: context[:only_path]) + end + + def object_link_text_extras(object, matches) + extras = super + + path = matches[:path] if matches.names.include?("path") + case path + when '/diffs' + extras.unshift "diffs" + when '/commits' + extras.unshift "commits" + when '/builds' + extras.unshift "builds" + end + + extras + end + end + end +end diff --git a/lib/banzai/filter/redactor_filter.rb b/lib/banzai/filter/redactor_filter.rb new file mode 100644 index 00000000000..89e7a79789a --- /dev/null +++ b/lib/banzai/filter/redactor_filter.rb @@ -0,0 +1,43 @@ +require 'banzai' +require 'html/pipeline/filter' + +module Banzai + module Filter + # HTML filter that removes references to records that the current user does + # not have permission to view. + # + # Expected to be run in its own post-processing pipeline. + # + class RedactorFilter < HTML::Pipeline::Filter + def call + doc.css('a.gfm').each do |node| + unless user_can_reference?(node) + # The reference should be replaced by the original text, + # which is not always the same as the rendered text. + text = node.attr('data-original') || node.text + node.replace(text) + end + end + + doc + end + + private + + def user_can_reference?(node) + if node.has_attribute?('data-reference-filter') + reference_type = node.attr('data-reference-filter') + reference_filter = Banzai::Filter.const_get(reference_type) + + reference_filter.user_can_reference?(current_user, node, context) + else + true + end + end + + def current_user + context[:current_user] + end + end + end +end diff --git a/lib/banzai/filter/reference_filter.rb b/lib/banzai/filter/reference_filter.rb new file mode 100644 index 00000000000..33457a3f361 --- /dev/null +++ b/lib/banzai/filter/reference_filter.rb @@ -0,0 +1,190 @@ +require 'active_support/core_ext/string/output_safety' +require 'banzai' +require 'html/pipeline/filter' + +module Banzai + module Filter + # Base class for GitLab Flavored Markdown reference filters. + # + # References within <pre>, <code>, <a>, and <style> elements are ignored. + # + # Context options: + # :project (required) - Current project, ignored if reference is cross-project. + # :only_path - Generate path-only links. + class ReferenceFilter < HTML::Pipeline::Filter + def self.user_can_reference?(user, node, context) + if node.has_attribute?('data-project') + project_id = node.attr('data-project').to_i + return true if project_id == context[:project].try(:id) + + project = Project.find(project_id) rescue nil + Ability.abilities.allowed?(user, :read_project, project) + else + true + end + end + + def self.referenced_by(node) + raise NotImplementedError, "#{self} does not implement #{__method__}" + end + + # Returns a data attribute String to attach to a reference link + # + # attributes - Hash, where the key becomes the data attribute name and the + # value is the data attribute value + # + # Examples: + # + # data_attribute(project: 1, issue: 2) + # # => "data-reference-filter=\"SomeReferenceFilter\" data-project=\"1\" data-issue=\"2\"" + # + # data_attribute(project: 3, merge_request: 4) + # # => "data-reference-filter=\"SomeReferenceFilter\" data-project=\"3\" data-merge-request=\"4\"" + # + # Returns a String + def data_attribute(attributes = {}) + attributes[:reference_filter] = self.class.name.demodulize + attributes.map { |key, value| %Q(data-#{key.to_s.dasherize}="#{value}") }.join(" ") + end + + def escape_once(html) + ERB::Util.html_escape_once(html) + end + + def ignore_parents + @ignore_parents ||= begin + # Don't look for references in text nodes that are children of these + # elements. + parents = %w(pre code a style) + parents << 'blockquote' if context[:ignore_blockquotes] + parents.to_set + end + end + + def ignored_ancestry?(node) + has_ancestor?(node, ignore_parents) + end + + def project + context[:project] + end + + def reference_class(type) + "gfm gfm-#{type}" + end + + # Iterate through the document's text nodes, yielding the current node's + # content if: + # + # * The `project` context value is present AND + # * The node's content matches `pattern` AND + # * The node is not an ancestor of an ignored node type + # + # pattern - Regex pattern against which to match the node's content + # + # Yields the current node's String contents. The result of the block will + # replace the node's existing content and update the current document. + # + # Returns the updated Nokogiri::HTML::DocumentFragment object. + def replace_text_nodes_matching(pattern) + return doc if project.nil? + + search_text_nodes(doc).each do |node| + next if ignored_ancestry?(node) + next unless node.text =~ pattern + + content = node.to_html + + html = yield content + + next if html == content + + node.replace(html) + end + + doc + end + + # Iterate through the document's link nodes, yielding the current node's + # content if: + # + # * The `project` context value is present AND + # * The node's content matches `pattern` + # + # pattern - Regex pattern against which to match the node's content + # + # Yields the current node's String contents. The result of the block will + # replace the node and update the current document. + # + # Returns the updated Nokogiri::HTML::DocumentFragment object. + def replace_link_nodes_with_text(pattern) + return doc if project.nil? + + doc.search('a').each do |node| + klass = node.attr('class') + next if klass && klass.include?('gfm') + + link = node.attr('href') + text = node.text + + next unless link && text + + link = URI.decode(link) + # Ignore ending punctionation like periods or commas + next unless link == text && text =~ /\A#{pattern}/ + + html = yield text + + next if html == text + + node.replace(html) + end + + doc + end + + # Iterate through the document's link nodes, yielding the current node's + # content if: + # + # * The `project` context value is present AND + # * The node's HREF matches `pattern` + # + # pattern - Regex pattern against which to match the node's HREF + # + # Yields the current node's String HREF and String content. + # The result of the block will replace the node and update the current document. + # + # Returns the updated Nokogiri::HTML::DocumentFragment object. + def replace_link_nodes_with_href(pattern) + return doc if project.nil? + + doc.search('a').each do |node| + klass = node.attr('class') + next if klass && klass.include?('gfm') + + link = node.attr('href') + text = node.text + + next unless link && text + link = URI.decode(link) + next unless link && link =~ /\A#{pattern}\z/ + + html = yield link, text + + next if html == link + + node.replace(html) + end + + doc + end + + # Ensure that a :project key exists in context + # + # Note that while the key might exist, its value could be nil! + def validate + needs :project + end + end + end +end diff --git a/lib/banzai/filter/reference_gatherer_filter.rb b/lib/banzai/filter/reference_gatherer_filter.rb new file mode 100644 index 00000000000..855f238ac1e --- /dev/null +++ b/lib/banzai/filter/reference_gatherer_filter.rb @@ -0,0 +1,62 @@ +require 'banzai' +require 'html/pipeline/filter' + +module Banzai + module Filter + # HTML filter that gathers all referenced records that the current user has + # permission to view. + # + # Expected to be run in its own post-processing pipeline. + # + class ReferenceGathererFilter < HTML::Pipeline::Filter + def initialize(*) + super + + result[:references] ||= Hash.new { |hash, type| hash[type] = [] } + end + + def call + doc.css('a.gfm').each do |node| + gather_references(node) + end + + load_lazy_references unless ReferenceExtractor.lazy? + + doc + end + + private + + def gather_references(node) + return unless node.has_attribute?('data-reference-filter') + + reference_type = node.attr('data-reference-filter') + reference_filter = Banzai::Filter.const_get(reference_type) + + return if context[:reference_filter] && reference_filter != context[:reference_filter] + + return unless reference_filter.user_can_reference?(current_user, node, context) + + references = reference_filter.referenced_by(node) + return unless references + + references.each do |type, values| + Array.wrap(values).each do |value| + result[:references][type] << value + end + end + end + + def load_lazy_references + refs = result[:references] + refs.each do |type, values| + refs[type] = ReferenceExtractor.lazily(values) + end + end + + def current_user + context[:current_user] + end + end + end +end diff --git a/lib/banzai/filter/relative_link_filter.rb b/lib/banzai/filter/relative_link_filter.rb new file mode 100644 index 00000000000..5a081125f21 --- /dev/null +++ b/lib/banzai/filter/relative_link_filter.rb @@ -0,0 +1,157 @@ +require 'banzai' +require 'html/pipeline/filter' +require 'uri' + +module Banzai + module Filter + # HTML filter that "fixes" relative links to files in a repository. + # + # Context options: + # :commit + # :project + # :project_wiki + # :ref + # :requested_path + class RelativeLinkFilter < HTML::Pipeline::Filter + def call + return doc unless linkable_files? + + doc.search('a:not(.gfm)').each do |el| + process_link_attr el.attribute('href') + end + + doc.search('img').each do |el| + process_link_attr el.attribute('src') + end + + doc + end + + protected + + def linkable_files? + context[:project_wiki].nil? && repository.try(:exists?) && !repository.empty? + end + + def process_link_attr(html_attr) + return if html_attr.blank? + + uri = URI(html_attr.value) + if uri.relative? && uri.path.present? + html_attr.value = rebuild_relative_uri(uri).to_s + end + rescue URI::Error + # noop + end + + def rebuild_relative_uri(uri) + file_path = relative_file_path(uri.path) + + uri.path = [ + relative_url_root, + context[:project].path_with_namespace, + path_type(file_path), + ref || context[:project].default_branch, # if no ref exists, point to the default branch + file_path + ].compact.join('/').squeeze('/').chomp('/') + + uri + end + + def relative_file_path(path) + nested_path = build_relative_path(path, context[:requested_path]) + file_exists?(nested_path) ? nested_path : path + end + + # Convert a relative path into its correct location based on the currently + # requested path + # + # path - Relative path String + # request_path - Currently-requested path String + # + # Examples: + # + # # File in the same directory as the current path + # build_relative_path("users.md", "doc/api/README.md") + # # => "doc/api/users.md" + # + # # File in the same directory, which is also the current path + # build_relative_path("users.md", "doc/api") + # # => "doc/api/users.md" + # + # # Going up one level to a different directory + # build_relative_path("../update/7.14-to-8.0.md", "doc/api/README.md") + # # => "doc/update/7.14-to-8.0.md" + # + # Returns a String + def build_relative_path(path, request_path) + return request_path if path.empty? + return path unless request_path + + parts = request_path.split('/') + parts.pop if path_type(request_path) != 'tree' + + while parts.length > 1 && path.start_with?('../') + parts.pop + path.sub!('../', '') + end + + parts.push(path).join('/') + end + + def file_exists?(path) + return false if path.nil? + repository.blob_at(current_sha, path).present? || + repository.tree(current_sha, path).entries.any? + end + + # Get the type of the given path + # + # path - String path to check + # + # Examples: + # + # path_type('doc/README.md') # => 'blob' + # path_type('doc/logo.png') # => 'raw' + # path_type('doc/api') # => 'tree' + # + # Returns a String + def path_type(path) + unescaped_path = Addressable::URI.unescape(path) + + if tree?(unescaped_path) + 'tree' + elsif image?(unescaped_path) + 'raw' + else + 'blob' + end + end + + def tree?(path) + repository.tree(current_sha, path).entries.any? + end + + def image?(path) + repository.blob_at(current_sha, path).try(:image?) + end + + def current_sha + context[:commit].try(:id) || + ref ? repository.commit(ref).try(:sha) : repository.head_commit.sha + end + + def relative_url_root + Gitlab.config.gitlab.relative_url_root.presence || '/' + end + + def ref + context[:ref] + end + + def repository + context[:project].try(:repository) + end + end + end +end diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb new file mode 100644 index 00000000000..d03e3ae4b3c --- /dev/null +++ b/lib/banzai/filter/sanitization_filter.rb @@ -0,0 +1,99 @@ +require 'banzai' +require 'html/pipeline/filter' +require 'html/pipeline/sanitization_filter' + +module Banzai + module Filter + # Sanitize HTML + # + # Extends HTML::Pipeline::SanitizationFilter with a custom whitelist. + class SanitizationFilter < HTML::Pipeline::SanitizationFilter + def whitelist + # Descriptions are more heavily sanitized, allowing only a few elements. + # See http://git.io/vkuAN + if context[:inline_sanitization] + whitelist = LIMITED + whitelist[:elements] -= %w(pre code img ol ul li) + else + whitelist = super + end + + customize_whitelist(whitelist) + + whitelist + end + + private + + def customized?(transformers) + transformers.last.source_location[0] == __FILE__ + end + + def customize_whitelist(whitelist) + # Only push these customizations once + return if customized?(whitelist[:transformers]) + + # Allow code highlighting + whitelist[:attributes]['pre'] = %w(class) + whitelist[:attributes]['span'] = %w(class) + + # Allow table alignment + whitelist[:attributes]['th'] = %w(style) + whitelist[:attributes]['td'] = %w(style) + + # Allow span elements + whitelist[:elements].push('span') + + # Allow any protocol in `a` elements... + whitelist[:protocols].delete('a') + + # ...but then remove links with the `javascript` protocol + whitelist[:transformers].push(remove_javascript_links) + + # Remove `rel` attribute from `a` elements + whitelist[:transformers].push(remove_rel) + + # Remove `class` attribute from non-highlight spans + whitelist[:transformers].push(clean_spans) + + whitelist + end + + def remove_javascript_links + lambda do |env| + node = env[:node] + + return unless node.name == 'a' + return unless node.has_attribute?('href') + + if node['href'].start_with?('javascript', ':javascript') + node.remove_attribute('href') + end + end + end + + def remove_rel + lambda do |env| + if env[:node_name] == 'a' + env[:node].remove_attribute('rel') + end + end + end + + def clean_spans + lambda do |env| + node = env[:node] + + return unless node.name == 'span' + return unless node.has_attribute?('class') + + unless has_ancestor?(node, 'pre') + node.remove_attribute('class') + end + + { node_whitelist: [node] } + end + end + end + end +end diff --git a/lib/banzai/filter/snippet_reference_filter.rb b/lib/banzai/filter/snippet_reference_filter.rb new file mode 100644 index 00000000000..1ad5df96f85 --- /dev/null +++ b/lib/banzai/filter/snippet_reference_filter.rb @@ -0,0 +1,25 @@ +require 'banzai' + +module Banzai + module Filter + # HTML filter that replaces snippet references with links. References to + # snippets that do not exist are ignored. + # + # This filter supports cross-project references. + class SnippetReferenceFilter < AbstractReferenceFilter + def self.object_class + Snippet + end + + def find_object(project, id) + project.snippets.find_by(id: id) + end + + def url_for_object(snippet, project) + h = Gitlab::Application.routes.url_helpers + h.namespace_project_snippet_url(project.namespace, project, snippet, + only_path: context[:only_path]) + end + end + end +end diff --git a/lib/banzai/filter/syntax_highlight_filter.rb b/lib/banzai/filter/syntax_highlight_filter.rb new file mode 100644 index 00000000000..c889cc1e97c --- /dev/null +++ b/lib/banzai/filter/syntax_highlight_filter.rb @@ -0,0 +1,45 @@ +require 'banzai' +require 'html/pipeline/filter' +require 'rouge/plugins/redcarpet' + +module Banzai + module Filter + # HTML Filter to highlight fenced code blocks + # + class SyntaxHighlightFilter < HTML::Pipeline::Filter + include Rouge::Plugins::Redcarpet + + def call + doc.search('pre > code').each do |node| + highlight_node(node) + end + + doc + end + + def highlight_node(node) + language = node.attr('class') + code = node.text + + begin + highlighted = block_code(code, language) + rescue + # Gracefully handle syntax highlighter bugs/errors to ensure + # users can still access an issue/comment/etc. + highlighted = "<pre>#{code}</pre>" + end + + # Replace the parent `pre` element with the entire highlighted block + node.parent.replace(highlighted) + end + + private + + # Override Rouge::Plugins::Redcarpet#rouge_formatter + def rouge_formatter(lexer) + Rouge::Formatters::HTMLGitlab.new( + cssclass: "code highlight js-syntax-highlight #{lexer.tag}") + end + end + end +end diff --git a/lib/banzai/filter/table_of_contents_filter.rb b/lib/banzai/filter/table_of_contents_filter.rb new file mode 100644 index 00000000000..92d130074dc --- /dev/null +++ b/lib/banzai/filter/table_of_contents_filter.rb @@ -0,0 +1,63 @@ +require 'banzai' +require 'html/pipeline/filter' + +module Banzai + module Filter + # HTML filter that adds an anchor child element to all Headers in a + # document, so that they can be linked to. + # + # Generates the Table of Contents with links to each header. See Results. + # + # Based on HTML::Pipeline::TableOfContentsFilter. + # + # Context options: + # :no_header_anchors - Skips all processing done by this filter. + # + # Results: + # :toc - String containing Table of Contents data as a `ul` element with + # `li` child elements. + class TableOfContentsFilter < HTML::Pipeline::Filter + PUNCTUATION_REGEXP = /[^\p{Word}\- ]/u + + def call + return doc if context[:no_header_anchors] + + result[:toc] = "" + + headers = Hash.new(0) + + doc.css('h1, h2, h3, h4, h5, h6').each do |node| + text = node.text + + id = text.downcase + id.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation + id.gsub!(' ', '-') # replace spaces with dash + id.squeeze!('-') # replace multiple dashes with one + + uniq = (headers[id] > 0) ? "-#{headers[id]}" : '' + headers[id] += 1 + + if header_content = node.children.first + href = "#{id}#{uniq}" + push_toc(href, text) + header_content.add_previous_sibling(anchor_tag(href)) + end + end + + result[:toc] = %Q{<ul class="section-nav">\n#{result[:toc]}</ul>} unless result[:toc].empty? + + doc + end + + private + + def anchor_tag(href) + %Q{<a id="#{href}" class="anchor" href="##{href}" aria-hidden="true"></a>} + end + + def push_toc(href, text) + result[:toc] << %Q{<li><a href="##{href}">#{text}</a></li>\n} + end + end + end +end diff --git a/lib/banzai/filter/task_list_filter.rb b/lib/banzai/filter/task_list_filter.rb new file mode 100644 index 00000000000..bdf7c2ebdfc --- /dev/null +++ b/lib/banzai/filter/task_list_filter.rb @@ -0,0 +1,24 @@ +require 'banzai' +require 'task_list/filter' + +module Banzai + module Filter + # Work around a bug in the default TaskList::Filter that adds a `task-list` + # class to every list element, regardless of whether or not it contains a + # task list. + # + # This is a (hopefully) temporary fix, pending a new release of the + # task_list gem. + # + # See https://github.com/github/task_list/pull/60 + class TaskListFilter < TaskList::Filter + def add_css_class(node, *new_class_names) + if new_class_names.include?('task-list') + super if node.children.any? { |c| c['class'] == 'task-list-item' } + else + super + end + end + end + end +end diff --git a/lib/banzai/filter/upload_link_filter.rb b/lib/banzai/filter/upload_link_filter.rb new file mode 100644 index 00000000000..1a1d0aad8ca --- /dev/null +++ b/lib/banzai/filter/upload_link_filter.rb @@ -0,0 +1,47 @@ +require 'banzai' +require 'html/pipeline/filter' +require 'uri' + +module Banzai + module Filter + # HTML filter that "fixes" relative upload links to files. + # Context options: + # :project (required) - Current project + # + class UploadLinkFilter < HTML::Pipeline::Filter + def call + doc.search('a').each do |el| + process_link_attr el.attribute('href') + end + + doc.search('img').each do |el| + process_link_attr el.attribute('src') + end + + doc + end + + protected + + def process_link_attr(html_attr) + return if html_attr.blank? + + uri = html_attr.value + if uri.starts_with?("/uploads/") + html_attr.value = build_url(uri).to_s + end + end + + def build_url(uri) + File.join(Gitlab.config.gitlab.url, context[:project].path_with_namespace, uri) + end + + # Ensure that a :project key exists in context + # + # Note that while the key might exist, its value could be nil! + def validate + needs :project + end + end + end +end diff --git a/lib/banzai/filter/user_reference_filter.rb b/lib/banzai/filter/user_reference_filter.rb new file mode 100644 index 00000000000..67c24faf991 --- /dev/null +++ b/lib/banzai/filter/user_reference_filter.rb @@ -0,0 +1,129 @@ +require 'banzai' + +module Banzai + module Filter + # HTML filter that replaces user or group references with links. + # + # A special `@all` reference is also supported. + class UserReferenceFilter < ReferenceFilter + # Public: Find `@user` user references in text + # + # UserReferenceFilter.references_in(text) do |match, username| + # "<a href=...>@#{user}</a>" + # end + # + # text - String text to search. + # + # Yields the String match, and the String user name. + # + # Returns a String replaced with the return of the block. + def self.references_in(text) + text.gsub(User.reference_pattern) do |match| + yield match, $~[:user] + end + end + + def self.referenced_by(node) + if node.has_attribute?('data-group') + group = Group.find(node.attr('data-group')) rescue nil + return unless group + + { user: group.users } + elsif node.has_attribute?('data-user') + { user: LazyReference.new(User, node.attr('data-user')) } + elsif node.has_attribute?('data-project') + project = Project.find(node.attr('data-project')) rescue nil + return unless project + + { user: project.team.members.flatten } + end + end + + def self.user_can_reference?(user, node, context) + if node.has_attribute?('data-group') + group = Group.find(node.attr('data-group')) rescue nil + Ability.abilities.allowed?(user, :read_group, group) + else + super + end + end + + def call + replace_text_nodes_matching(User.reference_pattern) do |content| + user_link_filter(content) + end + + replace_link_nodes_with_href(User.reference_pattern) do |link, text| + user_link_filter(link, link_text: text) + end + end + + # Replace `@user` user references in text with links to the referenced + # user's profile page. + # + # text - String text to replace references in. + # + # Returns a String with `@user` references replaced with links. All links + # have `gfm` and `gfm-project_member` class names attached for styling. + def user_link_filter(text, link_text: nil) + self.class.references_in(text) do |match, username| + if username == 'all' + link_to_all(link_text: link_text) + elsif namespace = Namespace.find_by(path: username) + link_to_namespace(namespace, link_text: link_text) || match + else + match + end + end + end + + private + + def urls + Gitlab::Application.routes.url_helpers + end + + def link_class + reference_class(:project_member) + end + + def link_to_all(link_text: nil) + project = context[:project] + url = urls.namespace_project_url(project.namespace, project, + only_path: context[:only_path]) + data = data_attribute(project: project.id) + text = link_text || User.reference_prefix + 'all' + + link_tag(url, data, text) + end + + def link_to_namespace(namespace, link_text: nil) + if namespace.is_a?(Group) + link_to_group(namespace.path, namespace, link_text: link_text) + else + link_to_user(namespace.path, namespace, link_text: link_text) + end + end + + def link_to_group(group, namespace, link_text: nil) + url = urls.group_url(group, only_path: context[:only_path]) + data = data_attribute(group: namespace.id) + text = link_text || Group.reference_prefix + group + + link_tag(url, data, text) + end + + def link_to_user(user, namespace, link_text: nil) + url = urls.user_url(user, only_path: context[:only_path]) + data = data_attribute(user: namespace.owner_id) + text = link_text || User.reference_prefix + user + + link_tag(url, data, text) + end + + def link_tag(url, data, text) + %(<a href="#{url}" #{data} class="#{link_class}">#{text}</a>) + end + end + end +end diff --git a/lib/banzai/lazy_reference.rb b/lib/banzai/lazy_reference.rb new file mode 100644 index 00000000000..073ec5d9801 --- /dev/null +++ b/lib/banzai/lazy_reference.rb @@ -0,0 +1,27 @@ +require 'banzai' + +module Banzai + class LazyReference + def self.load(refs) + lazy_references, values = refs.partition { |ref| ref.is_a?(self) } + + lazy_values = lazy_references.group_by(&:klass).flat_map do |klass, refs| + ids = refs.flat_map(&:ids) + klass.where(id: ids) + end + + values + lazy_values + end + + attr_reader :klass, :ids + + def initialize(klass, ids) + @klass = klass + @ids = Array.wrap(ids).map(&:to_i) + end + + def load + self.klass.where(id: self.ids) + end + end +end diff --git a/lib/banzai/pipeline.rb b/lib/banzai/pipeline.rb new file mode 100644 index 00000000000..4e017809d9d --- /dev/null +++ b/lib/banzai/pipeline.rb @@ -0,0 +1,10 @@ +require 'banzai' + +module Banzai + module Pipeline + def self.[](name) + name ||= :full + const_get("#{name.to_s.camelize}Pipeline") + end + end +end diff --git a/lib/banzai/pipeline/asciidoc_pipeline.rb b/lib/banzai/pipeline/asciidoc_pipeline.rb new file mode 100644 index 00000000000..5e76a817be5 --- /dev/null +++ b/lib/banzai/pipeline/asciidoc_pipeline.rb @@ -0,0 +1,13 @@ +require 'banzai' + +module Banzai + module Pipeline + class AsciidocPipeline < BasePipeline + def self.filters + [ + Filter::RelativeLinkFilter + ] + end + end + end +end diff --git a/lib/banzai/pipeline/atom_pipeline.rb b/lib/banzai/pipeline/atom_pipeline.rb new file mode 100644 index 00000000000..957f352aec5 --- /dev/null +++ b/lib/banzai/pipeline/atom_pipeline.rb @@ -0,0 +1,14 @@ +require 'banzai' + +module Banzai + module Pipeline + class AtomPipeline < FullPipeline + def self.transform_context(context) + super(context).merge( + only_path: false, + xhtml: true + ) + end + end + end +end diff --git a/lib/banzai/pipeline/base_pipeline.rb b/lib/banzai/pipeline/base_pipeline.rb new file mode 100644 index 00000000000..cd30009e5c0 --- /dev/null +++ b/lib/banzai/pipeline/base_pipeline.rb @@ -0,0 +1,30 @@ +require 'banzai' +require 'html/pipeline' + +module Banzai + module Pipeline + class BasePipeline + def self.filters + [] + end + + def self.transform_context(context) + context + end + + def self.html_pipeline + @html_pipeline ||= HTML::Pipeline.new(filters) + end + + class << self + %i(call to_document to_html).each do |meth| + define_method(meth) do |text, context| + context = transform_context(context) + + html_pipeline.send(meth, text, context) + end + end + end + end + end +end diff --git a/lib/banzai/pipeline/combined_pipeline.rb b/lib/banzai/pipeline/combined_pipeline.rb new file mode 100644 index 00000000000..f3bf1809d18 --- /dev/null +++ b/lib/banzai/pipeline/combined_pipeline.rb @@ -0,0 +1,27 @@ +require 'banzai' + +module Banzai + module Pipeline + module CombinedPipeline + def self.new(*pipelines) + Class.new(BasePipeline) do + const_set :PIPELINES, pipelines + + def self.pipelines + self::PIPELINES + end + + def self.filters + pipelines.flat_map(&:filters) + end + + def self.transform_context(context) + pipelines.reduce(context) do |context, pipeline| + pipeline.transform_context(context) + end + end + end + end + end + end +end diff --git a/lib/banzai/pipeline/description_pipeline.rb b/lib/banzai/pipeline/description_pipeline.rb new file mode 100644 index 00000000000..94c2cb165a5 --- /dev/null +++ b/lib/banzai/pipeline/description_pipeline.rb @@ -0,0 +1,14 @@ +require 'banzai' + +module Banzai + module Pipeline + class DescriptionPipeline < FullPipeline + def self.transform_context(context) + super(context).merge( + # SanitizationFilter + inline_sanitization: true + ) + end + end + end +end diff --git a/lib/banzai/pipeline/email_pipeline.rb b/lib/banzai/pipeline/email_pipeline.rb new file mode 100644 index 00000000000..14356145a35 --- /dev/null +++ b/lib/banzai/pipeline/email_pipeline.rb @@ -0,0 +1,13 @@ +require 'banzai' + +module Banzai + module Pipeline + class EmailPipeline < FullPipeline + def self.transform_context(context) + super(context).merge( + only_path: false + ) + end + end + end +end diff --git a/lib/banzai/pipeline/full_pipeline.rb b/lib/banzai/pipeline/full_pipeline.rb new file mode 100644 index 00000000000..72395a5d50e --- /dev/null +++ b/lib/banzai/pipeline/full_pipeline.rb @@ -0,0 +1,9 @@ +require 'banzai' + +module Banzai + module Pipeline + class FullPipeline < CombinedPipeline.new(PlainMarkdownPipeline, GfmPipeline) + + end + end +end diff --git a/lib/banzai/pipeline/gfm_pipeline.rb b/lib/banzai/pipeline/gfm_pipeline.rb new file mode 100644 index 00000000000..38750b55ec7 --- /dev/null +++ b/lib/banzai/pipeline/gfm_pipeline.rb @@ -0,0 +1,41 @@ +require 'banzai' + +module Banzai + module Pipeline + class GfmPipeline < BasePipeline + def self.filters + @filters ||= [ + Filter::SyntaxHighlightFilter, + Filter::SanitizationFilter, + + Filter::UploadLinkFilter, + Filter::EmojiFilter, + Filter::TableOfContentsFilter, + Filter::AutolinkFilter, + Filter::ExternalLinkFilter, + + Filter::UserReferenceFilter, + Filter::IssueReferenceFilter, + Filter::ExternalIssueReferenceFilter, + Filter::MergeRequestReferenceFilter, + Filter::SnippetReferenceFilter, + Filter::CommitRangeReferenceFilter, + Filter::CommitReferenceFilter, + Filter::LabelReferenceFilter, + + Filter::TaskListFilter + ] + end + + def self.transform_context(context) + context.merge( + only_path: true, + + # EmojiFilter + asset_host: Gitlab::Application.config.asset_host, + asset_root: Gitlab.config.gitlab.base_url + ) + end + end + end +end diff --git a/lib/banzai/pipeline/note_pipeline.rb b/lib/banzai/pipeline/note_pipeline.rb new file mode 100644 index 00000000000..89335143852 --- /dev/null +++ b/lib/banzai/pipeline/note_pipeline.rb @@ -0,0 +1,14 @@ +require 'banzai' + +module Banzai + module Pipeline + class NotePipeline < FullPipeline + def self.transform_context(context) + super(context).merge( + # TableOfContentsFilter + no_header_anchors: true + ) + end + end + end +end diff --git a/lib/banzai/pipeline/plain_markdown_pipeline.rb b/lib/banzai/pipeline/plain_markdown_pipeline.rb new file mode 100644 index 00000000000..998fd75daa2 --- /dev/null +++ b/lib/banzai/pipeline/plain_markdown_pipeline.rb @@ -0,0 +1,13 @@ +require 'banzai' + +module Banzai + module Pipeline + class PlainMarkdownPipeline < BasePipeline + def self.filters + [ + Filter::MarkdownFilter + ] + end + end + end +end diff --git a/lib/banzai/pipeline/post_process_pipeline.rb b/lib/banzai/pipeline/post_process_pipeline.rb new file mode 100644 index 00000000000..148f24b6ce1 --- /dev/null +++ b/lib/banzai/pipeline/post_process_pipeline.rb @@ -0,0 +1,20 @@ +require 'banzai' + +module Banzai + module Pipeline + class PostProcessPipeline < BasePipeline + def self.filters + [ + Filter::RelativeLinkFilter, + Filter::RedactorFilter + ] + end + + def self.transform_context(context) + context.merge( + post_process: true + ) + end + end + end +end diff --git a/lib/banzai/pipeline/reference_extraction_pipeline.rb b/lib/banzai/pipeline/reference_extraction_pipeline.rb new file mode 100644 index 00000000000..4f9bc9fcccc --- /dev/null +++ b/lib/banzai/pipeline/reference_extraction_pipeline.rb @@ -0,0 +1,13 @@ +require 'banzai' + +module Banzai + module Pipeline + class ReferenceExtractionPipeline < BasePipeline + def self.filters + [ + Filter::ReferenceGathererFilter + ] + end + end + end +end diff --git a/lib/banzai/pipeline/single_line_pipeline.rb b/lib/banzai/pipeline/single_line_pipeline.rb new file mode 100644 index 00000000000..6725c9039a9 --- /dev/null +++ b/lib/banzai/pipeline/single_line_pipeline.rb @@ -0,0 +1,9 @@ +require 'banzai' + +module Banzai + module Pipeline + class SingleLinePipeline < GfmPipeline + + end + end +end diff --git a/lib/banzai/reference_extractor.rb b/lib/banzai/reference_extractor.rb new file mode 100644 index 00000000000..8ae5b890953 --- /dev/null +++ b/lib/banzai/reference_extractor.rb @@ -0,0 +1,55 @@ +require 'banzai' + +module Banzai + # Extract possible GFM references from an arbitrary String for further processing. + class ReferenceExtractor + class << self + LAZY_KEY = :banzai_reference_extractor_lazy + + def lazy? + Thread.current[LAZY_KEY] + end + + def lazily(values = nil, &block) + return values || block.call if lazy? + + begin + Thread.current[LAZY_KEY] = true + + values ||= block.call + + Banzai::LazyReference.load(values.uniq).uniq + ensure + Thread.current[LAZY_KEY] = false + end + end + end + + def initialize + @texts = [] + end + + def analyze(text, context = {}) + @texts << Renderer.render(text, context) + end + + def references(type, context = {}) + filter = Banzai::Filter["#{type}_reference"] + + context.merge!( + pipeline: :reference_extraction, + + # ReferenceGathererFilter + reference_filter: filter + ) + + self.class.lazily do + @texts.flat_map do |html| + text_context = context.dup + result = Renderer.render_result(html, text_context) + result[:references][type] + end.uniq + end + end + end +end diff --git a/lib/banzai/renderer.rb b/lib/banzai/renderer.rb new file mode 100644 index 00000000000..891c0fd7749 --- /dev/null +++ b/lib/banzai/renderer.rb @@ -0,0 +1,76 @@ +module Banzai + module Renderer + # Convert a Markdown String into an HTML-safe String of HTML + # + # Note that while the returned HTML will have been sanitized of dangerous + # HTML, it may post a risk of information leakage if it's not also passed + # through `post_process`. + # + # Also note that the returned String is always HTML, not XHTML. Views + # requiring XHTML, such as Atom feeds, need to call `post_process` on the + # result, providing the appropriate `pipeline` option. + # + # markdown - Markdown String + # context - Hash of context options passed to our HTML Pipeline + # + # Returns an HTML-safe String + def self.render(text, context = {}) + cache_key = context.delete(:cache_key) + cache_key = full_cache_key(cache_key, context[:pipeline]) + + if cache_key + Rails.cache.fetch(cache_key) do + cacheless_render(text, context) + end + else + cacheless_render(text, context) + end + end + + def self.render_result(text, context = {}) + Pipeline[context[:pipeline]].call(text, context) + end + + # Perform post-processing on an HTML String + # + # This method is used to perform state-dependent changes to a String of + # HTML, such as removing references that the current user doesn't have + # permission to make (`RedactorFilter`). + # + # html - String to process + # context - Hash of options to customize output + # :pipeline - Symbol pipeline type + # :project - Project + # :user - User object + # + # Returns an HTML-safe String + def self.post_process(html, context) + context = Pipeline[context[:pipeline]].transform_context(context) + + pipeline = Pipeline[:post_process] + if context[:xhtml] + pipeline.to_document(html, context).to_html(save_with: Nokogiri::XML::Node::SaveOptions::AS_XHTML) + else + pipeline.to_html(html, context) + end.html_safe + end + + private + + def self.cacheless_render(text, context = {}) + result = render_result(text, context) + + output = result[:output] + if output.respond_to?(:to_html) + output.to_html + else + output.to_s + end + end + + def self.full_cache_key(cache_key, pipeline_name) + return unless cache_key + ["banzai", *cache_key, pipeline_name || :full] + end + end +end |
