From 49c081b9f38e99bbc11e7132d87773749b5b39d5 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 28 Oct 2015 14:43:27 +0100 Subject: Improve performance of User.find_by_any_email MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This query used to rely on a JOIN, effectively producing the following SQL: SELECT users.* FROM users LEFT OUTER JOIN emails ON emails.user_id = users.id WHERE (users.email = X OR emails.email = X) LIMIT 1; The use of a JOIN means having to scan over all Emails and users, join them together and then filter out the rows that don't match the criteria (though this step may be taken into account already when joining). In the new setup this query instead uses a sub-query, producing the following SQL: SELECT * FROM users WHERE id IN (select user_id FROM emails WHERE email = X) OR email = X LIMIT 1; This query has the benefit that it: 1. Doesn't have to JOIN any rows 2. Only has to operate on a relatively small set of rows from the "emails" table. Since most users will only have a handful of Emails associated (certainly not hundreds or even thousands) the size of the set returned by the sub-query is small enough that it should not become problematic. Performance of the old versus new version can be measured using the following benchmark: # Save this in ./bench.rb require 'benchmark/ips' email = 'yorick@gitlab.com' def User.find_by_any_email_old(email) user_table = arel_table email_table = Email.arel_table query = user_table. project(user_table[Arel.star]). join(email_table, Arel::Nodes::OuterJoin). on(user_table[:id].eq(email_table[:user_id])). where(user_table[:email].eq(email).or(email_table[:email].eq(email))) find_by_sql(query.to_sql).first end Benchmark.ips do |bench| bench.report 'original' do User.find_by_any_email_old(email) end bench.report 'optimized' do User.find_by_any_email(email) end bench.compare! end Running this locally using "bundle exec rails r bench.rb" produces the following output: Calculating ------------------------------------- original 1.000 i/100ms optimized 93.000 i/100ms ------------------------------------------------- original 11.103 (± 0.0%) i/s - 56.000 optimized 948.713 (± 5.3%) i/s - 4.743k Comparison: optimized: 948.7 i/s original: 11.1 i/s - 85.45x slower In other words, the new setup is 85x faster compared to the old setup, at least when running this benchmark locally. For GitLab.com these improvements result in User.find_by_any_email taking only ~170 ms to run, instead of around 800 ms. While this is "only" an improvement of about 4.5 times (instead of 85x) it's still significantly better than before. Fixes #3242 --- app/models/user.rb | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'app/models/user.rb') diff --git a/app/models/user.rb b/app/models/user.rb index c72beacbf0f..924cb543fab 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -235,21 +235,9 @@ class User < ActiveRecord::Base # Find a User by their primary email or any associated secondary email def find_by_any_email(email) - user_table = arel_table - email_table = Email.arel_table - - # Use ARel to build a query: - query = user_table. - # SELECT "users".* FROM "users" - project(user_table[Arel.star]). - # LEFT OUTER JOIN "emails" - join(email_table, Arel::Nodes::OuterJoin). - # ON "users"."id" = "emails"."user_id" - on(user_table[:id].eq(email_table[:user_id])). - # WHERE ("user"."email" = '' OR "emails"."email" = '') - where(user_table[:email].eq(email).or(email_table[:email].eq(email))) - - find_by_sql(query.to_sql).first + User.reorder(nil). + where('id IN (SELECT user_id FROM emails WHERE email = :email) OR email = :email', email: email). + take end def filter(filter_name) -- cgit v1.2.1 From 24c8f42278844cf48cdd9871b8285445379623f0 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Thu, 29 Oct 2015 12:05:47 +0100 Subject: Use a UNION for User.find_by_any_email This is significantly faster than using a sub-query, at least when run on the GitLab.com production database. The benchmarks are a lot slower now with these changes, most likely due to PostgreSQL choosing a different (and less efficient) plan based on the amount of data present in the test database. Thanks to @dlemstra for suggesting the use of a UNION. --- app/models/user.rb | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'app/models/user.rb') diff --git a/app/models/user.rb b/app/models/user.rb index 924cb543fab..35f5ab56798 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -235,9 +235,18 @@ class User < ActiveRecord::Base # Find a User by their primary email or any associated secondary email def find_by_any_email(email) - User.reorder(nil). - where('id IN (SELECT user_id FROM emails WHERE email = :email) OR email = :email', email: email). - take + # Arel doesn't allow for chaining operations on union nodes, thus we have + # to write this query by hand. See the following issue for more info: + # https://github.com/rails/arel/issues/98. + sql = '(SELECT * FROM users WHERE email = :email + UNION + SELECT users.* + FROM emails + INNER JOIN users ON users.id = emails.user_id + WHERE emails.email = :email) + LIMIT 1;' + + User.find_by_sql([sql, { email: email }]).first end def filter(filter_name) -- cgit v1.2.1 From bba46623c20de52942c85eb2aba3649e3f4ba296 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Thu, 29 Oct 2015 13:33:20 +0100 Subject: Fixed UNION syntax for MySQL MySQL doesn't support the previous syntax. --- app/models/user.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'app/models/user.rb') diff --git a/app/models/user.rb b/app/models/user.rb index 35f5ab56798..66db70080b7 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -238,9 +238,9 @@ class User < ActiveRecord::Base # Arel doesn't allow for chaining operations on union nodes, thus we have # to write this query by hand. See the following issue for more info: # https://github.com/rails/arel/issues/98. - sql = '(SELECT * FROM users WHERE email = :email + sql = '(SELECT * FROM users WHERE email = :email) UNION - SELECT users.* + (SELECT users.* FROM emails INNER JOIN users ON users.id = emails.user_id WHERE emails.email = :email) -- cgit v1.2.1 From a9df714764d6138bf162acd82b780ca82a21864b Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Thu, 29 Oct 2015 17:51:49 +0100 Subject: Use a subquery with IDs only for find_by_any_email This further improves performance of User.find_by_any_email and is roughly twice as fast as the previous UNION setup. Thanks again to @dlemstra for suggesting this. --- app/models/user.rb | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'app/models/user.rb') diff --git a/app/models/user.rb b/app/models/user.rb index 66db70080b7..67fef1c1e6a 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -235,15 +235,13 @@ class User < ActiveRecord::Base # Find a User by their primary email or any associated secondary email def find_by_any_email(email) - # Arel doesn't allow for chaining operations on union nodes, thus we have - # to write this query by hand. See the following issue for more info: - # https://github.com/rails/arel/issues/98. - sql = '(SELECT * FROM users WHERE email = :email) - UNION - (SELECT users.* - FROM emails - INNER JOIN users ON users.id = emails.user_id - WHERE emails.email = :email) + sql = 'SELECT * + FROM users + WHERE id IN ( + SELECT id FROM users WHERE email = :email + UNION + SELECT emails.user_id FROM emails WHERE email = :email + ) LIMIT 1;' User.find_by_sql([sql, { email: email }]).first -- cgit v1.2.1 From a237999f000526b3db5b0b5a72a665adcff29522 Mon Sep 17 00:00:00 2001 From: Dmitriy Zaporozhets Date: Fri, 13 Nov 2015 19:22:46 +0100 Subject: Annotate models Signed-off-by: Dmitriy Zaporozhets --- app/models/user.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'app/models/user.rb') diff --git a/app/models/user.rb b/app/models/user.rb index 67fef1c1e6a..9ffadcf4468 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -54,6 +54,7 @@ # public_email :string(255) default(""), not null # dashboard :integer default(0) # project_view :integer default(0) +# consumed_timestep :integer # layout :integer default(0) # -- cgit v1.2.1 From 03f5ff750b107b30a6d306aafb6699a9c9ecff0d Mon Sep 17 00:00:00 2001 From: Kamil Trzcinski Date: Mon, 16 Nov 2015 13:24:36 +0100 Subject: Show specific runners from projects where user is master or owner --- app/models/user.rb | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'app/models/user.rb') diff --git a/app/models/user.rb b/app/models/user.rb index 9ffadcf4468..61abea1f6ea 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -405,6 +405,15 @@ class User < ActiveRecord::Base end end + def master_or_owner_projects_id + @master_or_owner_projects_id ||= begin + scope = { access_level: [ Gitlab::Access::MASTER, Gitlab::Access::OWNER ] } + project_ids = personal_projects.pluck(:id) + project_ids.push(*groups_projects.where(members: scope).pluck(:id)) + project_ids.push(*projects.where(members: scope).pluck(:id).uniq) + end + end + # Projects user has access to def authorized_projects @authorized_projects ||= Project.where(id: authorized_projects_id) @@ -765,14 +774,10 @@ class User < ActiveRecord::Base !solo_owned_groups.present? end - def ci_authorized_projects - @ci_authorized_projects ||= Ci::Project.where(gitlab_id: authorized_projects_id) - end - def ci_authorized_runners @ci_authorized_runners ||= begin runner_ids = Ci::RunnerProject.joins(:project). - where(ci_projects: { gitlab_id: authorized_projects_id }).select(:runner_id) + where(ci_projects: { gitlab_id: master_or_owner_projects_id }).select(:runner_id) Ci::Runner.specific.where(id: runner_ids) end end -- cgit v1.2.1