summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorick Peterse <yorickpeterse@gmail.com>2018-07-18 16:34:04 +0200
committerYorick Peterse <yorickpeterse@gmail.com>2018-07-18 16:34:04 +0200
commit42e1080d54cf32919a5592bbdeef1857afc90783 (patch)
treebb312c77c341d9959b67ddf65325c60fcefc3924
parent4736e0519b0d8c66ab35fd636f5f81089822ba7a (diff)
downloadgitlab-ce-42e1080d54cf32919a5592bbdeef1857afc90783.tar.gz
WIP parallel DB migrationsparallel-db-migrations
-rw-r--r--app/models/concerns/each_batch.rb4
-rw-r--r--parallel_migrate.rb144
2 files changed, 148 insertions, 0 deletions
diff --git a/app/models/concerns/each_batch.rb b/app/models/concerns/each_batch.rb
index 6ddbb8da1a9..6baa9edc263 100644
--- a/app/models/concerns/each_batch.rb
+++ b/app/models/concerns/each_batch.rb
@@ -38,6 +38,10 @@ module EachBatch
# of - The number of rows to retrieve per batch.
# column - The column to use for ordering the batches.
def each_batch(of: 1000, column: primary_key)
+ unless block_given?
+ return to_enum(__method__, of: of, column: column).lazy
+ end
+
unless column
raise ArgumentError,
'the column: argument must be set to a column name to use for ordering rows'
diff --git a/parallel_migrate.rb b/parallel_migrate.rb
new file mode 100644
index 00000000000..8c4c115bb8e
--- /dev/null
+++ b/parallel_migrate.rb
@@ -0,0 +1,144 @@
+# frozen_string_literal: true
+# rubocop: disable all
+
+class PostgresSetting < ActiveRecord::Base
+ self.table_name = 'pg_settings'
+
+ def self.autovacuum_interval
+ select('setting::bigint AS interval')
+ .where(name: 'autovacuum_naptime')
+ .take
+ .interval
+ end
+end
+
+class PostgresTableStatistic < ActiveRecord::Base
+ self.table_name = 'pg_stat_all_tables'
+
+ # This threshold is based on what GitLab.com can usually replicate in a matter
+ # of seconds.
+ MAX_DEAD_TUPLES = 100_000
+
+ alias_attribute :dead_tuples, :n_dead_tup
+ alias_attribute :live_tuples, :n_live_tup
+
+ def self.for_table(table)
+ find_by(relname: table)
+ end
+
+ def too_many_dead_tuples?
+ # If there are no live tuples (e.g. the statistics are out of date), the
+ # below calculation would return true, even if there are only a few dead
+ # tuples.
+ return false if live_tuples.zero?
+
+ # For small tables we want to limit the number of dead tuples to 1% of the
+ # total number of tuples.
+ #
+ # For large tables this can result in a very large number (millions for
+ # example), so we enforce an upper limit on the number of dead tuples.
+ return true if dead_tuples >= MAX_DEAD_TUPLES
+
+ (dead_tuples.to_f / live_tuples.to_f) >= 0.01
+ end
+end
+
+class Table
+ def initialize(name)
+ @name = name
+ end
+
+ def too_many_dead_tuples?
+ # Table statistics are only refreshed when running ANALYZE. To ensure this
+ # method doesn't return the wrong value, we first run ANALYZE before
+ # checking the statistics.
+ analyze
+
+ # We force the use of a transaction here so the query always goes to the
+ # primary, even when using the EE DB load balancer.
+ stats = PostgresTableStatistic.transaction do
+ PostgresTableStatistic.for_table(@name)
+ end
+
+ stats.too_many_dead_tuples?
+ end
+
+ def analyze
+ ActiveRecord::Base.connection.execute("ANALYZE #{quoted_table_name}")
+ end
+
+ def vacuum_analyze
+ ActiveRecord::Base.connection.execute("VACUUM ANALYZE #{quoted_table_name}")
+ end
+
+ def quoted_table_name
+ ActiveRecord::Base.connection.quote_table_name(@name)
+ end
+end
+
+class ReplicationSlot < ActiveRecord::Base
+ self.table_name = 'pg_replication_slots'
+
+ LAG_FUNCTION =
+ if Gitlab::Database.postgresql? && Gitlab::Database.version.to_f < 10.0
+ 'pg_xlog_location_diff(pg_current_xlog_insert_location(), restart_lsn)::bigint'
+ else
+ 'pg_wal_lsn_diff(pg_current_wal_insert_lsn(), restart_lsn)::bigint'
+ end
+
+ MAX_LAG = 100.megabytes
+
+ def self.lag_too_great?
+ sizes = transaction { pluck(LAG_FUNCTION) }
+ too_great = sizes.count { |size| size >= MAX_LAG }
+
+ # If too many replicas are falling behind too much, the availability of a
+ # GitLab instance might suffer. To prevent this from happening we require at
+ # least 1 replica to have data recent enough.
+ (sizes.length - too_great) >= 1
+ end
+end
+
+Event.send(:include, EachBatch)
+
+def parallel_migrate(query, batch_size: 1_000, concurrency: 8)
+ old_config = ActiveRecord::Base.configurations[Rails.env]
+ new_config = old_config.merge('pool' => concurrency)
+
+ ActiveRecord::Base.establish_connection(new_config)
+
+ begin
+ query.each_batch(of: batch_size).each_slice(concurrency) do |slices|
+ threads = slices.map do |slice|
+ Thread.new do
+ yield slice.first
+ end
+ end
+
+ threads.each(&:join)
+
+ table = Table.new(query.table_name)
+ start = Gitlab::Metrics::System.monotonic_time
+
+ while ReplicationSlot.lag_too_great? || table.too_many_dead_tuples?
+ sleep(5)
+
+ # If vacuuming hasn't kicked in yet we'll manually vacuum the table,
+ # ensuring we don't wait in this loop for too long.
+ #
+ # We use `nap time * 2` to make sure we don't start vacuuming right away
+ # when autovacuuming is waking up.
+ if (Gitlab::Metrics::System.monotonic_time - start) >= (PostgresSetting.autovacuum_interval * 2)
+ puts 'MANUALLY VACUUMING'
+ table.vacuum_analyze
+ end
+ end
+ end
+ ensure
+ ActiveRecord::Base.establish_connection(old_config)
+ end
+end
+
+parallel_migrate(Event, concurrency: 16) do |batch|
+ batch.update_all('updated_at = now()')
+end