diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/banzai/querying.rb | 56 | ||||
| -rw-r--r-- | lib/banzai/reference_extractor.rb | 5 | ||||
| -rw-r--r-- | lib/banzai/reference_parser/base_parser.rb | 5 | ||||
| -rw-r--r-- | lib/banzai/reference_parser/directly_addressed_user_parser.rb | 8 | ||||
| -rw-r--r-- | lib/gitlab/reference_extractor.rb | 8 | 
5 files changed, 73 insertions, 9 deletions
| diff --git a/lib/banzai/querying.rb b/lib/banzai/querying.rb index 1e1b51e683e..fb2faae02bc 100644 --- a/lib/banzai/querying.rb +++ b/lib/banzai/querying.rb @@ -1,18 +1,64 @@  module Banzai    module Querying +    module_function +      # Searches a Nokogiri document using a CSS query, optionally optimizing it      # whenever possible.      # -    # document - A document/element to search. -    # query    - The CSS query to use. +    # document          - A document/element to search. +    # query             - The CSS query to use. +    # reference_options - A hash with nodes filter options      # -    # Returns a Nokogiri::XML::NodeSet. -    def self.css(document, query) +    # Returns an array of Nokogiri::XML::Element objects if location is specified +    # in reference_options. Otherwise it would a Nokogiri::XML::NodeSet. +    def css(document, query, reference_options = {})        # When using "a.foo" Nokogiri compiles this to "//a[...]" but        # "descendant::a[...]" is quite a bit faster and achieves the same result.        xpath = Nokogiri::CSS.xpath_for(query)[0].gsub(%r{^//}, 'descendant::') +      xpath = restrict_to_p_nodes_at_root(xpath) if filter_nodes_at_beginning?(reference_options) +      nodes = document.xpath(xpath) + +      filter_nodes(nodes, reference_options) +    end + +    def restrict_to_p_nodes_at_root(xpath) +      xpath.gsub('descendant::', './p/') +    end + +    def filter_nodes(nodes, reference_options) +      if filter_nodes_at_beginning?(reference_options) +        filter_nodes_at_beginning(nodes) +      else +        nodes +      end +    end + +    def filter_nodes_at_beginning?(reference_options) +      reference_options && reference_options[:location] == :beginning +    end + +    # Selects child nodes if they are present in the beginning among other siblings. +    # +    # nodes - A Nokogiri::XML::NodeSet. +    # +    # Returns an array of Nokogiri::XML::Element objects. +    def filter_nodes_at_beginning(nodes) +      parents_and_nodes = nodes.group_by(&:parent) +      filtered_nodes = [] + +      parents_and_nodes.each do |parent, nodes| +        children = parent.children +        nodes    = nodes.to_a + +        children.each do |child| +          next if child.text.blank? +          node = nodes.shift +          break unless node == child +          filtered_nodes << node +        end +      end -      document.xpath(xpath) +      filtered_nodes      end    end  end diff --git a/lib/banzai/reference_extractor.rb b/lib/banzai/reference_extractor.rb index b26a41a1f3b..8e3b0c4db79 100644 --- a/lib/banzai/reference_extractor.rb +++ b/lib/banzai/reference_extractor.rb @@ -16,6 +16,11 @@ module Banzai        processor.process(html_documents)      end +    def reset_memoized_values +      @html_documents     = nil +      @texts_and_contexts = [] +    end +      private      def html_documents diff --git a/lib/banzai/reference_parser/base_parser.rb b/lib/banzai/reference_parser/base_parser.rb index d8a855ec1fe..2058a58d0ae 100644 --- a/lib/banzai/reference_parser/base_parser.rb +++ b/lib/banzai/reference_parser/base_parser.rb @@ -33,7 +33,7 @@ module Banzai      # they have access to.      class BaseParser        class << self -        attr_accessor :reference_type +        attr_accessor :reference_type, :reference_options        end        # Returns the attribute name containing the value for every object to be @@ -182,9 +182,10 @@ module Banzai        # the references.        def process(documents)          type = self.class.reference_type +        reference_options = self.class.reference_options          nodes = documents.flat_map do |document| -          Querying.css(document, "a[data-reference-type='#{type}'].gfm").to_a +          Querying.css(document, "a[data-reference-type='#{type}'].gfm", reference_options).to_a          end          gather_references(nodes) diff --git a/lib/banzai/reference_parser/directly_addressed_user_parser.rb b/lib/banzai/reference_parser/directly_addressed_user_parser.rb new file mode 100644 index 00000000000..77df9bbd024 --- /dev/null +++ b/lib/banzai/reference_parser/directly_addressed_user_parser.rb @@ -0,0 +1,8 @@ +module Banzai +  module ReferenceParser +    class DirectlyAddressedUserParser < UserParser +      self.reference_type = :user +      self.reference_options = { location: :beginning } +    end +  end +end diff --git a/lib/gitlab/reference_extractor.rb b/lib/gitlab/reference_extractor.rb index 11c0b01f0dc..437a339dd2b 100644 --- a/lib/gitlab/reference_extractor.rb +++ b/lib/gitlab/reference_extractor.rb @@ -1,13 +1,12 @@  module Gitlab    # Extract possible GFM references from an arbitrary String for further processing.    class ReferenceExtractor < Banzai::ReferenceExtractor -    REFERABLES = %i(user issue label milestone merge_request snippet commit commit_range) +    REFERABLES = %i(user issue label milestone merge_request snippet commit commit_range directly_addressed_user)      attr_accessor :project, :current_user, :author      def initialize(project, current_user = nil)        @project = project        @current_user = current_user -        @references = {}        super() @@ -21,6 +20,11 @@ module Gitlab        super(type, project, current_user)      end +    def reset_memoized_values +      @references = {} +      super() +    end +      REFERABLES.each do |type|        define_method("#{type}s") do          @references[type] ||= references(type) | 
