diff options
author | Sean McGivern <sean@mcgivern.me.uk> | 2017-11-29 09:10:43 +0000 |
---|---|---|
committer | Sean McGivern <sean@mcgivern.me.uk> | 2017-11-29 09:10:43 +0000 |
commit | a4f8dddc212fcd91f6a4a09e92b2de6117a21305 (patch) | |
tree | eed31a76289f865cdb1d392c6bad3f2bf96ebbc4 | |
parent | 3659327d71a033703451baba01873d5663755739 (diff) | |
parent | da42dfb3cf4a2fb0cdcc1a3b41438516a0bed0e5 (diff) | |
download | gitlab-ce-a4f8dddc212fcd91f6a4a09e92b2de6117a21305.tar.gz |
Merge branch 'dm-search-pattern' into 'master'
Use fuzzy search with minimum length of 3 characters where appropriate
Closes #40512
See merge request gitlab-org/gitlab-ce!15592
-rw-r--r-- | app/finders/notes_finder.rb | 3 | ||||
-rw-r--r-- | app/models/ci/runner.rb | 6 | ||||
-rw-r--r-- | app/models/concerns/issuable.rb | 9 | ||||
-rw-r--r-- | app/models/email.rb | 1 | ||||
-rw-r--r-- | app/models/group.rb | 14 | ||||
-rw-r--r-- | app/models/milestone.rb | 6 | ||||
-rw-r--r-- | app/models/namespace.rb | 6 | ||||
-rw-r--r-- | app/models/note.rb | 5 | ||||
-rw-r--r-- | app/models/project.rb | 10 | ||||
-rw-r--r-- | app/models/snippet.rb | 11 | ||||
-rw-r--r-- | app/models/user.rb | 24 | ||||
-rw-r--r-- | changelogs/unreleased/dm-search-pattern.yml | 5 | ||||
-rw-r--r-- | lib/gitlab/sql/pattern.rb | 25 | ||||
-rw-r--r-- | spec/lib/gitlab/sql/pattern_spec.rb | 30 | ||||
-rw-r--r-- | spec/models/ci/runner_spec.rb | 2 | ||||
-rw-r--r-- | spec/models/concerns/issuable_spec.rb | 10 | ||||
-rw-r--r-- | spec/models/snippet_spec.rb | 2 |
17 files changed, 84 insertions, 85 deletions
diff --git a/app/finders/notes_finder.rb b/app/finders/notes_finder.rb index 02eb983bf55..12157818bcd 100644 --- a/app/finders/notes_finder.rb +++ b/app/finders/notes_finder.rb @@ -104,8 +104,7 @@ class NotesFinder query = @params[:search] return notes unless query - pattern = "%#{query}%" - notes.where(Note.arel_table[:note].matches(pattern)) + notes.search(query) end # Notes changed since last fetch diff --git a/app/models/ci/runner.rb b/app/models/ci/runner.rb index c6509f89117..d39610a8995 100644 --- a/app/models/ci/runner.rb +++ b/app/models/ci/runner.rb @@ -1,6 +1,7 @@ module Ci class Runner < ActiveRecord::Base extend Gitlab::Ci::Model + include Gitlab::SQL::Pattern RUNNER_QUEUE_EXPIRY_TIME = 60.minutes ONLINE_CONTACT_TIMEOUT = 1.hour @@ -59,10 +60,7 @@ module Ci # # Returns an ActiveRecord::Relation. def self.search(query) - t = arel_table - pattern = "%#{query}%" - - where(t[:token].matches(pattern).or(t[:description].matches(pattern))) + fuzzy_search(query, [:token, :description]) end def self.contact_time_deadline diff --git a/app/models/concerns/issuable.rb b/app/models/concerns/issuable.rb index 27cd3118f81..5ca4a7086cb 100644 --- a/app/models/concerns/issuable.rb +++ b/app/models/concerns/issuable.rb @@ -122,9 +122,7 @@ module Issuable # # Returns an ActiveRecord::Relation. def search(query) - title = to_fuzzy_arel(:title, query) - - where(title) + fuzzy_search(query, [:title]) end # Searches for records with a matching title or description. @@ -135,10 +133,7 @@ module Issuable # # Returns an ActiveRecord::Relation. def full_search(query) - title = to_fuzzy_arel(:title, query) - description = to_fuzzy_arel(:description, query) - - where(title&.or(description)) + fuzzy_search(query, [:title, :description]) end def sort(method, excluded_labels: []) diff --git a/app/models/email.rb b/app/models/email.rb index 2da8b050149..d6516761f0a 100644 --- a/app/models/email.rb +++ b/app/models/email.rb @@ -1,5 +1,6 @@ class Email < ActiveRecord::Base include Sortable + include Gitlab::SQL::Pattern belongs_to :user diff --git a/app/models/group.rb b/app/models/group.rb index dc4500360b9..76262acf50c 100644 --- a/app/models/group.rb +++ b/app/models/group.rb @@ -50,20 +50,6 @@ class Group < Namespace Gitlab::Database.postgresql? end - # Searches for groups matching the given query. - # - # This method uses ILIKE on PostgreSQL and LIKE on MySQL. - # - # query - The search query as a String - # - # Returns an ActiveRecord::Relation. - def search(query) - table = Namespace.arel_table - pattern = "%#{query}%" - - where(table[:name].matches(pattern).or(table[:path].matches(pattern))) - end - def sort(method) if method == 'storage_size_desc' # storage_size is a virtual column so we need to diff --git a/app/models/milestone.rb b/app/models/milestone.rb index 01458120cda..c06ee8083f0 100644 --- a/app/models/milestone.rb +++ b/app/models/milestone.rb @@ -13,6 +13,7 @@ class Milestone < ActiveRecord::Base include Referable include StripAttribute include Milestoneish + include Gitlab::SQL::Pattern cache_markdown_field :title, pipeline: :single_line cache_markdown_field :description @@ -73,10 +74,7 @@ class Milestone < ActiveRecord::Base # # Returns an ActiveRecord::Relation. def search(query) - t = arel_table - pattern = "%#{query}%" - - where(t[:title].matches(pattern).or(t[:description].matches(pattern))) + fuzzy_search(query, [:title, :description]) end def filter_by_state(milestones, state) diff --git a/app/models/namespace.rb b/app/models/namespace.rb index 4d401e7ba18..fa76729a702 100644 --- a/app/models/namespace.rb +++ b/app/models/namespace.rb @@ -9,6 +9,7 @@ class Namespace < ActiveRecord::Base include Routable include AfterCommitQueue include Storage::LegacyNamespace + include Gitlab::SQL::Pattern # Prevent users from creating unreasonably deep level of nesting. # The number 20 was taken based on maximum nesting level of @@ -86,10 +87,7 @@ class Namespace < ActiveRecord::Base # # Returns an ActiveRecord::Relation def search(query) - t = arel_table - pattern = "%#{query}%" - - where(t[:name].matches(pattern).or(t[:path].matches(pattern))) + fuzzy_search(query, [:name, :path]) end def clean_path(path) diff --git a/app/models/note.rb b/app/models/note.rb index 50c9caf8529..340fe087f82 100644 --- a/app/models/note.rb +++ b/app/models/note.rb @@ -14,6 +14,7 @@ class Note < ActiveRecord::Base include ResolvableNote include IgnorableColumn include Editable + include Gitlab::SQL::Pattern module SpecialRole FIRST_TIME_CONTRIBUTOR = :first_time_contributor @@ -167,6 +168,10 @@ class Note < ActiveRecord::Base def has_special_role?(role, note) note.special_role == role end + + def search(query) + fuzzy_search(query, [:note]) + end end def cross_reference? diff --git a/app/models/project.rb b/app/models/project.rb index 85d580fe0fa..5a3f591c2e7 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -426,17 +426,11 @@ class Project < ActiveRecord::Base # # query - The search query as a String. def search(query) - pattern = to_pattern(query) - - where( - arel_table[:path].matches(pattern) - .or(arel_table[:name].matches(pattern)) - .or(arel_table[:description].matches(pattern)) - ) + fuzzy_search(query, [:path, :name, :description]) end def search_by_title(query) - non_archived.where(arel_table[:name].matches(to_pattern(query))) + non_archived.fuzzy_search(query, [:name]) end def visibility_levels diff --git a/app/models/snippet.rb b/app/models/snippet.rb index 2a5f07a15c4..05a16f11b59 100644 --- a/app/models/snippet.rb +++ b/app/models/snippet.rb @@ -9,6 +9,7 @@ class Snippet < ActiveRecord::Base include Mentionable include Spammable include Editable + include Gitlab::SQL::Pattern extend Gitlab::CurrentSettings @@ -135,10 +136,7 @@ class Snippet < ActiveRecord::Base # # Returns an ActiveRecord::Relation. def search(query) - t = arel_table - pattern = "%#{query}%" - - where(t[:title].matches(pattern).or(t[:file_name].matches(pattern))) + fuzzy_search(query, [:title, :file_name]) end # Searches for snippets with matching content. @@ -149,10 +147,7 @@ class Snippet < ActiveRecord::Base # # Returns an ActiveRecord::Relation. def search_code(query) - table = Snippet.arel_table - pattern = "%#{query}%" - - where(table[:content].matches(pattern)) + fuzzy_search(query, [:content]) end end end diff --git a/app/models/user.rb b/app/models/user.rb index cf6b36559a8..14941fd7f98 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -313,9 +313,6 @@ class User < ActiveRecord::Base # # Returns an ActiveRecord::Relation. def search(query) - table = arel_table - pattern = User.to_pattern(query) - order = <<~SQL CASE WHEN users.name = %{query} THEN 0 @@ -325,11 +322,8 @@ class User < ActiveRecord::Base END SQL - where( - table[:name].matches(pattern) - .or(table[:email].matches(pattern)) - .or(table[:username].matches(pattern)) - ).reorder(order % { query: ActiveRecord::Base.connection.quote(query) }, :name) + fuzzy_search(query, [:name, :email, :username]) + .reorder(order % { query: ActiveRecord::Base.connection.quote(query) }, :name) end # searches user by given pattern @@ -337,16 +331,16 @@ class User < ActiveRecord::Base # This method uses ILIKE on PostgreSQL and LIKE on MySQL. def search_with_secondary_emails(query) - table = arel_table email_table = Email.arel_table - pattern = "%#{query}%" - matched_by_emails_user_ids = email_table.project(email_table[:user_id]).where(email_table[:email].matches(pattern)) + matched_by_emails_user_ids = email_table + .project(email_table[:user_id]) + .where(Email.fuzzy_arel_match(:email, query)) where( - table[:name].matches(pattern) - .or(table[:email].matches(pattern)) - .or(table[:username].matches(pattern)) - .or(table[:id].in(matched_by_emails_user_ids)) + fuzzy_arel_match(:name, query) + .or(fuzzy_arel_match(:email, query)) + .or(fuzzy_arel_match(:username, query)) + .or(arel_table[:id].in(matched_by_emails_user_ids)) ) end diff --git a/changelogs/unreleased/dm-search-pattern.yml b/changelogs/unreleased/dm-search-pattern.yml new file mode 100644 index 00000000000..1670d8c4b9a --- /dev/null +++ b/changelogs/unreleased/dm-search-pattern.yml @@ -0,0 +1,5 @@ +--- +title: Use fuzzy search with minimum length of 3 characters where appropriate +merge_request: +author: +type: performance diff --git a/lib/gitlab/sql/pattern.rb b/lib/gitlab/sql/pattern.rb index 7c2d1d8f887..5f0c98cb5a4 100644 --- a/lib/gitlab/sql/pattern.rb +++ b/lib/gitlab/sql/pattern.rb @@ -4,9 +4,15 @@ module Gitlab extend ActiveSupport::Concern MIN_CHARS_FOR_PARTIAL_MATCHING = 3 - REGEX_QUOTED_WORD = /(?<=^| )"[^"]+"(?= |$)/ + REGEX_QUOTED_WORD = /(?<=\A| )"[^"]+"(?= |\z)/ class_methods do + def fuzzy_search(query, columns) + matches = columns.map { |col| fuzzy_arel_match(col, query) }.compact.reduce(:or) + + where(matches) + end + def to_pattern(query) if partial_matching?(query) "%#{sanitize_sql_like(query)}%" @@ -19,12 +25,19 @@ module Gitlab query.length >= MIN_CHARS_FOR_PARTIAL_MATCHING end - def to_fuzzy_arel(column, query) - words = select_fuzzy_words(query) + def fuzzy_arel_match(column, query) + query = query.squish + return nil unless query.present? - matches = words.map { |word| arel_table[column].matches(to_pattern(word)) } + words = select_fuzzy_words(query) - matches.reduce { |result, match| result.and(match) } + if words.any? + words.map { |word| arel_table[column].matches(to_pattern(word)) }.reduce(:and) + else + # No words of at least 3 chars, but we can search for an exact + # case insensitive match with the query as a whole + arel_table[column].matches(sanitize_sql_like(query)) + end end def select_fuzzy_words(query) @@ -32,7 +45,7 @@ module Gitlab query = quoted_words.reduce(query) { |q, quoted_word| q.sub(quoted_word, '') } - words = query.split(/\s+/) + words = query.split quoted_words.map! { |quoted_word| quoted_word[1..-2] } diff --git a/spec/lib/gitlab/sql/pattern_spec.rb b/spec/lib/gitlab/sql/pattern_spec.rb index 48d56628ed5..ef51e3cc8df 100644 --- a/spec/lib/gitlab/sql/pattern_spec.rb +++ b/spec/lib/gitlab/sql/pattern_spec.rb @@ -137,22 +137,22 @@ describe Gitlab::SQL::Pattern do end end - describe '.to_fuzzy_arel' do - subject(:to_fuzzy_arel) { Issue.to_fuzzy_arel(:title, query) } + describe '.fuzzy_arel_match' do + subject(:fuzzy_arel_match) { Issue.fuzzy_arel_match(:title, query) } context 'with a word equal to 3 chars' do let(:query) { 'foo' } it 'returns a single ILIKE condition' do - expect(to_fuzzy_arel.to_sql).to match(/title.*I?LIKE '\%foo\%'/) + expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE '\%foo\%'/) end end context 'with a word shorter than 3 chars' do let(:query) { 'fo' } - it 'returns nil' do - expect(to_fuzzy_arel).to be_nil + it 'returns a single equality condition' do + expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE 'fo'/) end end @@ -160,7 +160,23 @@ describe Gitlab::SQL::Pattern do let(:query) { 'foo baz' } it 'returns a joining LIKE condition using a AND' do - expect(to_fuzzy_arel.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%'/) + expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%'/) + end + end + + context 'with two words both shorter than 3 chars' do + let(:query) { 'fo ba' } + + it 'returns a single ILIKE condition' do + expect(fuzzy_arel_match.to_sql).to match(/title.*I?LIKE 'fo ba'/) + end + end + + context 'with two words, one shorter 3 chars' do + let(:query) { 'foo ba' } + + it 'returns a single ILIKE condition using the longer word' do + expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%'/) end end @@ -168,7 +184,7 @@ describe Gitlab::SQL::Pattern do let(:query) { 'foo "really bar" baz' } it 'returns a joining LIKE condition using a AND' do - expect(to_fuzzy_arel.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%' AND .*title.*I?LIKE '\%really bar\%'/) + expect(fuzzy_arel_match.to_sql).to match(/title.+I?LIKE '\%foo\%' AND .*title.*I?LIKE '\%baz\%' AND .*title.*I?LIKE '\%really bar\%'/) end end end diff --git a/spec/models/ci/runner_spec.rb b/spec/models/ci/runner_spec.rb index 584dfe9a5c1..a93e7e233a8 100644 --- a/spec/models/ci/runner_spec.rb +++ b/spec/models/ci/runner_spec.rb @@ -473,7 +473,7 @@ describe Ci::Runner do end describe '.search' do - let(:runner) { create(:ci_runner, token: '123abc') } + let(:runner) { create(:ci_runner, token: '123abc', description: 'test runner') } it 'returns runners with a matching token' do expect(described_class.search(runner.token)).to eq([runner]) diff --git a/spec/models/concerns/issuable_spec.rb b/spec/models/concerns/issuable_spec.rb index 765b2729918..a53b59c4e08 100644 --- a/spec/models/concerns/issuable_spec.rb +++ b/spec/models/concerns/issuable_spec.rb @@ -67,6 +67,7 @@ describe Issuable do describe ".search" do let!(:searchable_issue) { create(:issue, title: "Searchable awesome issue") } + let!(:searchable_issue2) { create(:issue, title: 'Aw') } it 'returns issues with a matching title' do expect(issuable_class.search(searchable_issue.title)) @@ -86,8 +87,8 @@ describe Issuable do expect(issuable_class.search('searchable issue')).to eq([searchable_issue]) end - it 'returns all issues with a query shorter than 3 chars' do - expect(issuable_class.search('zz')).to eq(issuable_class.all) + it 'returns issues with a matching title for a query shorter than 3 chars' do + expect(issuable_class.search(searchable_issue2.title.downcase)).to eq([searchable_issue2]) end end @@ -95,6 +96,7 @@ describe Issuable do let!(:searchable_issue) do create(:issue, title: "Searchable awesome issue", description: 'Many cute kittens') end + let!(:searchable_issue2) { create(:issue, title: "Aw", description: "Cu") } it 'returns issues with a matching title' do expect(issuable_class.full_search(searchable_issue.title)) @@ -133,8 +135,8 @@ describe Issuable do expect(issuable_class.full_search('many kittens')).to eq([searchable_issue]) end - it 'returns all issues with a query shorter than 3 chars' do - expect(issuable_class.search('zz')).to eq(issuable_class.all) + it 'returns issues with a matching description for a query shorter than 3 chars' do + expect(issuable_class.full_search(searchable_issue2.description.downcase)).to eq([searchable_issue2]) end end diff --git a/spec/models/snippet_spec.rb b/spec/models/snippet_spec.rb index de3ca300ae3..e09d89d235d 100644 --- a/spec/models/snippet_spec.rb +++ b/spec/models/snippet_spec.rb @@ -88,7 +88,7 @@ describe Snippet do end describe '.search' do - let(:snippet) { create(:snippet) } + let(:snippet) { create(:snippet, title: 'test snippet') } it 'returns snippets with a matching title' do expect(described_class.search(snippet.title)).to eq([snippet]) |