diff options
author | Yorick Peterse <yorickpeterse@gmail.com> | 2018-07-18 16:34:04 +0200 |
---|---|---|
committer | Yorick Peterse <yorickpeterse@gmail.com> | 2018-07-18 16:34:04 +0200 |
commit | 42e1080d54cf32919a5592bbdeef1857afc90783 (patch) | |
tree | bb312c77c341d9959b67ddf65325c60fcefc3924 | |
parent | 4736e0519b0d8c66ab35fd636f5f81089822ba7a (diff) | |
download | gitlab-ce-42e1080d54cf32919a5592bbdeef1857afc90783.tar.gz |
WIP parallel DB migrationsparallel-db-migrations
-rw-r--r-- | app/models/concerns/each_batch.rb | 4 | ||||
-rw-r--r-- | parallel_migrate.rb | 144 |
2 files changed, 148 insertions, 0 deletions
diff --git a/app/models/concerns/each_batch.rb b/app/models/concerns/each_batch.rb index 6ddbb8da1a9..6baa9edc263 100644 --- a/app/models/concerns/each_batch.rb +++ b/app/models/concerns/each_batch.rb @@ -38,6 +38,10 @@ module EachBatch # of - The number of rows to retrieve per batch. # column - The column to use for ordering the batches. def each_batch(of: 1000, column: primary_key) + unless block_given? + return to_enum(__method__, of: of, column: column).lazy + end + unless column raise ArgumentError, 'the column: argument must be set to a column name to use for ordering rows' diff --git a/parallel_migrate.rb b/parallel_migrate.rb new file mode 100644 index 00000000000..8c4c115bb8e --- /dev/null +++ b/parallel_migrate.rb @@ -0,0 +1,144 @@ +# frozen_string_literal: true +# rubocop: disable all + +class PostgresSetting < ActiveRecord::Base + self.table_name = 'pg_settings' + + def self.autovacuum_interval + select('setting::bigint AS interval') + .where(name: 'autovacuum_naptime') + .take + .interval + end +end + +class PostgresTableStatistic < ActiveRecord::Base + self.table_name = 'pg_stat_all_tables' + + # This threshold is based on what GitLab.com can usually replicate in a matter + # of seconds. + MAX_DEAD_TUPLES = 100_000 + + alias_attribute :dead_tuples, :n_dead_tup + alias_attribute :live_tuples, :n_live_tup + + def self.for_table(table) + find_by(relname: table) + end + + def too_many_dead_tuples? + # If there are no live tuples (e.g. the statistics are out of date), the + # below calculation would return true, even if there are only a few dead + # tuples. + return false if live_tuples.zero? + + # For small tables we want to limit the number of dead tuples to 1% of the + # total number of tuples. + # + # For large tables this can result in a very large number (millions for + # example), so we enforce an upper limit on the number of dead tuples. + return true if dead_tuples >= MAX_DEAD_TUPLES + + (dead_tuples.to_f / live_tuples.to_f) >= 0.01 + end +end + +class Table + def initialize(name) + @name = name + end + + def too_many_dead_tuples? + # Table statistics are only refreshed when running ANALYZE. To ensure this + # method doesn't return the wrong value, we first run ANALYZE before + # checking the statistics. + analyze + + # We force the use of a transaction here so the query always goes to the + # primary, even when using the EE DB load balancer. + stats = PostgresTableStatistic.transaction do + PostgresTableStatistic.for_table(@name) + end + + stats.too_many_dead_tuples? + end + + def analyze + ActiveRecord::Base.connection.execute("ANALYZE #{quoted_table_name}") + end + + def vacuum_analyze + ActiveRecord::Base.connection.execute("VACUUM ANALYZE #{quoted_table_name}") + end + + def quoted_table_name + ActiveRecord::Base.connection.quote_table_name(@name) + end +end + +class ReplicationSlot < ActiveRecord::Base + self.table_name = 'pg_replication_slots' + + LAG_FUNCTION = + if Gitlab::Database.postgresql? && Gitlab::Database.version.to_f < 10.0 + 'pg_xlog_location_diff(pg_current_xlog_insert_location(), restart_lsn)::bigint' + else + 'pg_wal_lsn_diff(pg_current_wal_insert_lsn(), restart_lsn)::bigint' + end + + MAX_LAG = 100.megabytes + + def self.lag_too_great? + sizes = transaction { pluck(LAG_FUNCTION) } + too_great = sizes.count { |size| size >= MAX_LAG } + + # If too many replicas are falling behind too much, the availability of a + # GitLab instance might suffer. To prevent this from happening we require at + # least 1 replica to have data recent enough. + (sizes.length - too_great) >= 1 + end +end + +Event.send(:include, EachBatch) + +def parallel_migrate(query, batch_size: 1_000, concurrency: 8) + old_config = ActiveRecord::Base.configurations[Rails.env] + new_config = old_config.merge('pool' => concurrency) + + ActiveRecord::Base.establish_connection(new_config) + + begin + query.each_batch(of: batch_size).each_slice(concurrency) do |slices| + threads = slices.map do |slice| + Thread.new do + yield slice.first + end + end + + threads.each(&:join) + + table = Table.new(query.table_name) + start = Gitlab::Metrics::System.monotonic_time + + while ReplicationSlot.lag_too_great? || table.too_many_dead_tuples? + sleep(5) + + # If vacuuming hasn't kicked in yet we'll manually vacuum the table, + # ensuring we don't wait in this loop for too long. + # + # We use `nap time * 2` to make sure we don't start vacuuming right away + # when autovacuuming is waking up. + if (Gitlab::Metrics::System.monotonic_time - start) >= (PostgresSetting.autovacuum_interval * 2) + puts 'MANUALLY VACUUMING' + table.vacuum_analyze + end + end + end + ensure + ActiveRecord::Base.establish_connection(old_config) + end +end + +parallel_migrate(Event, concurrency: 16) do |batch| + batch.update_all('updated_at = now()') +end |