summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorRobert Speicher <robert@gitlab.com>2016-10-07 08:52:26 +0000
committerRobert Speicher <robert@gitlab.com>2016-10-07 08:52:26 +0000
commita17412f0aa9ef9c99a5c136906ea8a585326c27d (patch)
tree4e0d7171e0e9d6f57e151341d8b57328e4376ad1 /lib
parentc2cf1dd6c771ee3a181fa9704da593daa825c58a (diff)
parent110e15da260f96baffb48bebb8cba796374fbb1e (diff)
downloadgitlab-ce-a17412f0aa9ef9c99a5c136906ea8a585326c27d.tar.gz
Merge branch '18337-cache-html-in-database' into 'master'
Cache rendered Markdown fields in the database ## What does this MR do? Introduces cache fields for Markdown-containing fields in the database, and populates them. ## Why was this MR needed? Rendering Markdown into HTML is performance-intensive. A Redis cache already exists, but this approach is expected to be more performant and reduce unnecessary cache invalidations. ## What are the relevant issue numbers? Closes #18337 See merge request !6095
Diffstat (limited to 'lib')
-rw-r--r--lib/banzai.rb4
-rw-r--r--lib/banzai/filter/html_entity_filter.rb12
-rw-r--r--lib/banzai/note_renderer.rb5
-rw-r--r--lib/banzai/object_renderer.rb53
-rw-r--r--lib/banzai/pipeline/single_line_pipeline.rb1
-rw-r--r--lib/banzai/renderer.rb28
-rw-r--r--lib/tasks/cache.rake43
7 files changed, 98 insertions, 48 deletions
diff --git a/lib/banzai.rb b/lib/banzai.rb
index 9ebe379f454..35ca234c1ba 100644
--- a/lib/banzai.rb
+++ b/lib/banzai.rb
@@ -3,6 +3,10 @@ module Banzai
Renderer.render(text, context)
end
+ def self.render_field(object, field)
+ Renderer.render_field(object, field)
+ end
+
def self.cache_collection_render(texts_and_contexts)
Renderer.cache_collection_render(texts_and_contexts)
end
diff --git a/lib/banzai/filter/html_entity_filter.rb b/lib/banzai/filter/html_entity_filter.rb
new file mode 100644
index 00000000000..4ef8b3b6dcf
--- /dev/null
+++ b/lib/banzai/filter/html_entity_filter.rb
@@ -0,0 +1,12 @@
+require 'erb'
+
+module Banzai
+ module Filter
+ # Text filter that escapes these HTML entities: & " < >
+ class HTMLEntityFilter < HTML::Pipeline::TextFilter
+ def call
+ ERB::Util.html_escape(text)
+ end
+ end
+ end
+end
diff --git a/lib/banzai/note_renderer.rb b/lib/banzai/note_renderer.rb
index bab6a9934d1..2b7c10f1a0e 100644
--- a/lib/banzai/note_renderer.rb
+++ b/lib/banzai/note_renderer.rb
@@ -3,7 +3,7 @@ module Banzai
# Renders a collection of Note instances.
#
# notes - The notes to render.
- # project - The project to use for rendering/redacting.
+ # project - The project to use for redacting.
# user - The user viewing the notes.
# path - The request path.
# wiki - The project's wiki.
@@ -13,8 +13,7 @@ module Banzai
user,
requested_path: path,
project_wiki: wiki,
- ref: git_ref,
- pipeline: :note)
+ ref: git_ref)
renderer.render(notes, :note)
end
diff --git a/lib/banzai/object_renderer.rb b/lib/banzai/object_renderer.rb
index 9aef807c152..9f8eb0931b8 100644
--- a/lib/banzai/object_renderer.rb
+++ b/lib/banzai/object_renderer.rb
@@ -1,28 +1,32 @@
module Banzai
- # Class for rendering multiple objects (e.g. Note instances) in a single pass.
+ # Class for rendering multiple objects (e.g. Note instances) in a single pass,
+ # using +render_field+ to benefit from caching in the database. Rendering and
+ # redaction are both performed.
#
- # Rendered Markdown is stored in an attribute in every object based on the
- # name of the attribute containing the Markdown. For example, when the
- # attribute `note` is rendered the HTML is stored in `note_html`.
+ # The unredacted HTML is generated according to the usual +render_field+
+ # policy, so specify the pipeline and any other context options on the model.
+ #
+ # The *redacted* (i.e., suitable for use) HTML is placed in an attribute
+ # named "redacted_<foo>", where <foo> is the name of the cache field for the
+ # chosen attribute.
+ #
+ # As an example, rendering the attribute `note` would place the unredacted
+ # HTML into `note_html` and the redacted HTML into `redacted_note_html`.
class ObjectRenderer
attr_reader :project, :user
- # Make sure to set the appropriate pipeline in the `raw_context` attribute
- # (e.g. `:note` for Note instances).
- #
- # project - A Project to use for rendering and redacting Markdown.
+ # project - A Project to use for redacting Markdown.
# user - The user viewing the Markdown/HTML documents, if any.
- # context - A Hash containing extra attributes to use in the rendering
- # pipeline.
- def initialize(project, user = nil, raw_context = {})
+ # context - A Hash containing extra attributes to use during redaction
+ def initialize(project, user = nil, redaction_context = {})
@project = project
@user = user
- @raw_context = raw_context
+ @redaction_context = redaction_context
end
# Renders and redacts an Array of objects.
#
- # objects - The objects to render
+ # objects - The objects to render.
# attribute - The attribute containing the raw Markdown to render.
#
# Returns the same input objects.
@@ -32,7 +36,7 @@ module Banzai
objects.each_with_index do |object, index|
redacted_data = redacted[index]
- object.__send__("#{attribute}_html=", redacted_data[:document].to_html.html_safe)
+ object.__send__("redacted_#{attribute}_html=", redacted_data[:document].to_html.html_safe)
object.user_visible_reference_count = redacted_data[:visible_reference_count]
end
end
@@ -53,12 +57,8 @@ module Banzai
# Returns a Banzai context for the given object and attribute.
def context_for(object, attribute)
- context = base_context.merge(cache_key: [object, attribute])
-
- if object.respond_to?(:author)
- context[:author] = object.author
- end
-
+ context = base_context.dup
+ context = context.merge(object.banzai_render_context(attribute))
context
end
@@ -66,21 +66,16 @@ module Banzai
#
# Returns an Array of `Nokogiri::HTML::Document`.
def render_attributes(objects, attribute)
- strings_and_contexts = objects.map do |object|
+ objects.map do |object|
+ string = Banzai.render_field(object, attribute)
context = context_for(object, attribute)
- string = object.__send__(attribute)
-
- { text: string, context: context }
- end
-
- Banzai.cache_collection_render(strings_and_contexts).each_with_index.map do |html, index|
- Banzai::Pipeline[:relative_link].to_document(html, strings_and_contexts[index][:context])
+ Banzai::Pipeline[:relative_link].to_document(string, context)
end
end
def base_context
- @base_context ||= @raw_context.merge(current_user: user, project: project)
+ @base_context ||= @redaction_context.merge(current_user: user, project: project)
end
end
end
diff --git a/lib/banzai/pipeline/single_line_pipeline.rb b/lib/banzai/pipeline/single_line_pipeline.rb
index ba2555df98d..30bc035d085 100644
--- a/lib/banzai/pipeline/single_line_pipeline.rb
+++ b/lib/banzai/pipeline/single_line_pipeline.rb
@@ -3,6 +3,7 @@ module Banzai
class SingleLinePipeline < GfmPipeline
def self.filters
@filters ||= FilterArray[
+ Filter::HTMLEntityFilter,
Filter::SanitizationFilter,
Filter::EmojiFilter,
diff --git a/lib/banzai/renderer.rb b/lib/banzai/renderer.rb
index a4ae27eefd8..6924a293da8 100644
--- a/lib/banzai/renderer.rb
+++ b/lib/banzai/renderer.rb
@@ -31,6 +31,34 @@ module Banzai
end
end
+ # Convert a Markdown-containing field on an object into an HTML-safe String
+ # of HTML. This method is analogous to calling render(object.field), but it
+ # can cache the rendered HTML in the object, rather than Redis.
+ #
+ # The context to use is learned from the passed-in object by calling
+ # #banzai_render_context(field), and cannot be changed. Use #render, passing
+ # it the field text, if a custom rendering is needed. The generated context
+ # is returned along with the HTML.
+ def render_field(object, field)
+ html_field = object.markdown_cache_field_for(field)
+
+ html = object.__send__(html_field)
+ return html if html.present?
+
+ html = cacheless_render_field(object, field)
+ object.update_column(html_field, html) unless object.new_record? || object.destroyed?
+
+ html
+ end
+
+ # Same as +render_field+, but without consulting or updating the cache field
+ def cacheless_render_field(object, field)
+ text = object.__send__(field)
+ context = object.banzai_render_context(field)
+
+ cacheless_render(text, context)
+ end
+
# Perform multiple render from an Array of Markdown String into an
# Array of HTML-safe String of HTML.
#
diff --git a/lib/tasks/cache.rake b/lib/tasks/cache.rake
index 2214f855200..a95a3455a4a 100644
--- a/lib/tasks/cache.rake
+++ b/lib/tasks/cache.rake
@@ -1,22 +1,33 @@
namespace :cache do
- CLEAR_BATCH_SIZE = 1000 # There seems to be no speedup when pushing beyond 1,000
- REDIS_SCAN_START_STOP = '0' # Magic value, see http://redis.io/commands/scan
+ namespace :clear do
+ REDIS_CLEAR_BATCH_SIZE = 1000 # There seems to be no speedup when pushing beyond 1,000
+ REDIS_SCAN_START_STOP = '0' # Magic value, see http://redis.io/commands/scan
- desc "GitLab | Clear redis cache"
- task :clear => :environment do
- Gitlab::Redis.with do |redis|
- cursor = REDIS_SCAN_START_STOP
- loop do
- cursor, keys = redis.scan(
- cursor,
- match: "#{Gitlab::Redis::CACHE_NAMESPACE}*",
- count: CLEAR_BATCH_SIZE
- )
-
- redis.del(*keys) if keys.any?
-
- break if cursor == REDIS_SCAN_START_STOP
+ desc "GitLab | Clear redis cache"
+ task redis: :environment do
+ Gitlab::Redis.with do |redis|
+ cursor = REDIS_SCAN_START_STOP
+ loop do
+ cursor, keys = redis.scan(
+ cursor,
+ match: "#{Gitlab::Redis::CACHE_NAMESPACE}*",
+ count: REDIS_CLEAR_BATCH_SIZE
+ )
+
+ redis.del(*keys) if keys.any?
+
+ break if cursor == REDIS_SCAN_START_STOP
+ end
end
end
+
+ desc "GitLab | Clear database cache (in the background)"
+ task db: :environment do
+ ClearDatabaseCacheWorker.perform_async
+ end
+
+ task all: [:db, :redis]
end
+
+ task clear: 'cache:clear:all'
end