diff options
| author | Robert Speicher <robert@gitlab.com> | 2016-10-07 08:52:26 +0000 |
|---|---|---|
| committer | Robert Speicher <robert@gitlab.com> | 2016-10-07 08:52:26 +0000 |
| commit | a17412f0aa9ef9c99a5c136906ea8a585326c27d (patch) | |
| tree | 4e0d7171e0e9d6f57e151341d8b57328e4376ad1 /lib | |
| parent | c2cf1dd6c771ee3a181fa9704da593daa825c58a (diff) | |
| parent | 110e15da260f96baffb48bebb8cba796374fbb1e (diff) | |
| download | gitlab-ce-a17412f0aa9ef9c99a5c136906ea8a585326c27d.tar.gz | |
Merge branch '18337-cache-html-in-database' into 'master'
Cache rendered Markdown fields in the database
## What does this MR do?
Introduces cache fields for Markdown-containing fields in the database, and populates them.
## Why was this MR needed?
Rendering Markdown into HTML is performance-intensive. A Redis cache already exists, but this approach is expected to be more performant and reduce unnecessary cache invalidations.
## What are the relevant issue numbers?
Closes #18337
See merge request !6095
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/banzai.rb | 4 | ||||
| -rw-r--r-- | lib/banzai/filter/html_entity_filter.rb | 12 | ||||
| -rw-r--r-- | lib/banzai/note_renderer.rb | 5 | ||||
| -rw-r--r-- | lib/banzai/object_renderer.rb | 53 | ||||
| -rw-r--r-- | lib/banzai/pipeline/single_line_pipeline.rb | 1 | ||||
| -rw-r--r-- | lib/banzai/renderer.rb | 28 | ||||
| -rw-r--r-- | lib/tasks/cache.rake | 43 |
7 files changed, 98 insertions, 48 deletions
diff --git a/lib/banzai.rb b/lib/banzai.rb index 9ebe379f454..35ca234c1ba 100644 --- a/lib/banzai.rb +++ b/lib/banzai.rb @@ -3,6 +3,10 @@ module Banzai Renderer.render(text, context) end + def self.render_field(object, field) + Renderer.render_field(object, field) + end + def self.cache_collection_render(texts_and_contexts) Renderer.cache_collection_render(texts_and_contexts) end diff --git a/lib/banzai/filter/html_entity_filter.rb b/lib/banzai/filter/html_entity_filter.rb new file mode 100644 index 00000000000..4ef8b3b6dcf --- /dev/null +++ b/lib/banzai/filter/html_entity_filter.rb @@ -0,0 +1,12 @@ +require 'erb' + +module Banzai + module Filter + # Text filter that escapes these HTML entities: & " < > + class HTMLEntityFilter < HTML::Pipeline::TextFilter + def call + ERB::Util.html_escape(text) + end + end + end +end diff --git a/lib/banzai/note_renderer.rb b/lib/banzai/note_renderer.rb index bab6a9934d1..2b7c10f1a0e 100644 --- a/lib/banzai/note_renderer.rb +++ b/lib/banzai/note_renderer.rb @@ -3,7 +3,7 @@ module Banzai # Renders a collection of Note instances. # # notes - The notes to render. - # project - The project to use for rendering/redacting. + # project - The project to use for redacting. # user - The user viewing the notes. # path - The request path. # wiki - The project's wiki. @@ -13,8 +13,7 @@ module Banzai user, requested_path: path, project_wiki: wiki, - ref: git_ref, - pipeline: :note) + ref: git_ref) renderer.render(notes, :note) end diff --git a/lib/banzai/object_renderer.rb b/lib/banzai/object_renderer.rb index 9aef807c152..9f8eb0931b8 100644 --- a/lib/banzai/object_renderer.rb +++ b/lib/banzai/object_renderer.rb @@ -1,28 +1,32 @@ module Banzai - # Class for rendering multiple objects (e.g. Note instances) in a single pass. + # Class for rendering multiple objects (e.g. Note instances) in a single pass, + # using +render_field+ to benefit from caching in the database. Rendering and + # redaction are both performed. # - # Rendered Markdown is stored in an attribute in every object based on the - # name of the attribute containing the Markdown. For example, when the - # attribute `note` is rendered the HTML is stored in `note_html`. + # The unredacted HTML is generated according to the usual +render_field+ + # policy, so specify the pipeline and any other context options on the model. + # + # The *redacted* (i.e., suitable for use) HTML is placed in an attribute + # named "redacted_<foo>", where <foo> is the name of the cache field for the + # chosen attribute. + # + # As an example, rendering the attribute `note` would place the unredacted + # HTML into `note_html` and the redacted HTML into `redacted_note_html`. class ObjectRenderer attr_reader :project, :user - # Make sure to set the appropriate pipeline in the `raw_context` attribute - # (e.g. `:note` for Note instances). - # - # project - A Project to use for rendering and redacting Markdown. + # project - A Project to use for redacting Markdown. # user - The user viewing the Markdown/HTML documents, if any. - # context - A Hash containing extra attributes to use in the rendering - # pipeline. - def initialize(project, user = nil, raw_context = {}) + # context - A Hash containing extra attributes to use during redaction + def initialize(project, user = nil, redaction_context = {}) @project = project @user = user - @raw_context = raw_context + @redaction_context = redaction_context end # Renders and redacts an Array of objects. # - # objects - The objects to render + # objects - The objects to render. # attribute - The attribute containing the raw Markdown to render. # # Returns the same input objects. @@ -32,7 +36,7 @@ module Banzai objects.each_with_index do |object, index| redacted_data = redacted[index] - object.__send__("#{attribute}_html=", redacted_data[:document].to_html.html_safe) + object.__send__("redacted_#{attribute}_html=", redacted_data[:document].to_html.html_safe) object.user_visible_reference_count = redacted_data[:visible_reference_count] end end @@ -53,12 +57,8 @@ module Banzai # Returns a Banzai context for the given object and attribute. def context_for(object, attribute) - context = base_context.merge(cache_key: [object, attribute]) - - if object.respond_to?(:author) - context[:author] = object.author - end - + context = base_context.dup + context = context.merge(object.banzai_render_context(attribute)) context end @@ -66,21 +66,16 @@ module Banzai # # Returns an Array of `Nokogiri::HTML::Document`. def render_attributes(objects, attribute) - strings_and_contexts = objects.map do |object| + objects.map do |object| + string = Banzai.render_field(object, attribute) context = context_for(object, attribute) - string = object.__send__(attribute) - - { text: string, context: context } - end - - Banzai.cache_collection_render(strings_and_contexts).each_with_index.map do |html, index| - Banzai::Pipeline[:relative_link].to_document(html, strings_and_contexts[index][:context]) + Banzai::Pipeline[:relative_link].to_document(string, context) end end def base_context - @base_context ||= @raw_context.merge(current_user: user, project: project) + @base_context ||= @redaction_context.merge(current_user: user, project: project) end end end diff --git a/lib/banzai/pipeline/single_line_pipeline.rb b/lib/banzai/pipeline/single_line_pipeline.rb index ba2555df98d..30bc035d085 100644 --- a/lib/banzai/pipeline/single_line_pipeline.rb +++ b/lib/banzai/pipeline/single_line_pipeline.rb @@ -3,6 +3,7 @@ module Banzai class SingleLinePipeline < GfmPipeline def self.filters @filters ||= FilterArray[ + Filter::HTMLEntityFilter, Filter::SanitizationFilter, Filter::EmojiFilter, diff --git a/lib/banzai/renderer.rb b/lib/banzai/renderer.rb index a4ae27eefd8..6924a293da8 100644 --- a/lib/banzai/renderer.rb +++ b/lib/banzai/renderer.rb @@ -31,6 +31,34 @@ module Banzai end end + # Convert a Markdown-containing field on an object into an HTML-safe String + # of HTML. This method is analogous to calling render(object.field), but it + # can cache the rendered HTML in the object, rather than Redis. + # + # The context to use is learned from the passed-in object by calling + # #banzai_render_context(field), and cannot be changed. Use #render, passing + # it the field text, if a custom rendering is needed. The generated context + # is returned along with the HTML. + def render_field(object, field) + html_field = object.markdown_cache_field_for(field) + + html = object.__send__(html_field) + return html if html.present? + + html = cacheless_render_field(object, field) + object.update_column(html_field, html) unless object.new_record? || object.destroyed? + + html + end + + # Same as +render_field+, but without consulting or updating the cache field + def cacheless_render_field(object, field) + text = object.__send__(field) + context = object.banzai_render_context(field) + + cacheless_render(text, context) + end + # Perform multiple render from an Array of Markdown String into an # Array of HTML-safe String of HTML. # diff --git a/lib/tasks/cache.rake b/lib/tasks/cache.rake index 2214f855200..a95a3455a4a 100644 --- a/lib/tasks/cache.rake +++ b/lib/tasks/cache.rake @@ -1,22 +1,33 @@ namespace :cache do - CLEAR_BATCH_SIZE = 1000 # There seems to be no speedup when pushing beyond 1,000 - REDIS_SCAN_START_STOP = '0' # Magic value, see http://redis.io/commands/scan + namespace :clear do + REDIS_CLEAR_BATCH_SIZE = 1000 # There seems to be no speedup when pushing beyond 1,000 + REDIS_SCAN_START_STOP = '0' # Magic value, see http://redis.io/commands/scan - desc "GitLab | Clear redis cache" - task :clear => :environment do - Gitlab::Redis.with do |redis| - cursor = REDIS_SCAN_START_STOP - loop do - cursor, keys = redis.scan( - cursor, - match: "#{Gitlab::Redis::CACHE_NAMESPACE}*", - count: CLEAR_BATCH_SIZE - ) - - redis.del(*keys) if keys.any? - - break if cursor == REDIS_SCAN_START_STOP + desc "GitLab | Clear redis cache" + task redis: :environment do + Gitlab::Redis.with do |redis| + cursor = REDIS_SCAN_START_STOP + loop do + cursor, keys = redis.scan( + cursor, + match: "#{Gitlab::Redis::CACHE_NAMESPACE}*", + count: REDIS_CLEAR_BATCH_SIZE + ) + + redis.del(*keys) if keys.any? + + break if cursor == REDIS_SCAN_START_STOP + end end end + + desc "GitLab | Clear database cache (in the background)" + task db: :environment do + ClearDatabaseCacheWorker.perform_async + end + + task all: [:db, :redis] end + + task clear: 'cache:clear:all' end |
