From d3734fbd89c069d35856b440f12109af8a7ef9c9 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 6 Oct 2015 14:43:19 +0200 Subject: Use tar for intermediate backup storage During the backup we create an intermediate copy of two directories: builds and uploads. Instead of creating many small files with 'cp -r', we now use tar (and fast gzip) to create single intermediate files. This saves on disk IO and disk space while creating a backup. --- lib/backup/builds.rb | 31 ++----------------------------- lib/backup/files.rb | 39 +++++++++++++++++++++++++++++++++++++++ lib/backup/manager.rb | 4 ++-- lib/backup/uploads.rb | 30 ++---------------------------- 4 files changed, 45 insertions(+), 59 deletions(-) create mode 100644 lib/backup/files.rb (limited to 'lib/backup') diff --git a/lib/backup/builds.rb b/lib/backup/builds.rb index 6f56f680bb9..d269f8e260c 100644 --- a/lib/backup/builds.rb +++ b/lib/backup/builds.rb @@ -1,34 +1,7 @@ module Backup - class Builds - attr_reader :app_builds_dir, :backup_builds_dir, :backup_dir - + class Builds < Files def initialize - @app_builds_dir = Settings.gitlab_ci.builds_path - @backup_dir = Gitlab.config.backup.path - @backup_builds_dir = File.join(Gitlab.config.backup.path, 'builds') - end - - # Copy builds from builds directory to backup/builds - def dump - FileUtils.rm_rf(backup_builds_dir) - # Ensure the parent dir of backup_builds_dir exists - FileUtils.mkdir_p(Gitlab.config.backup.path) - # Fail if somebody raced to create backup_builds_dir before us - FileUtils.mkdir(backup_builds_dir, mode: 0700) - FileUtils.cp_r(app_builds_dir, backup_dir) - end - - def restore - backup_existing_builds_dir - - FileUtils.cp_r(backup_builds_dir, app_builds_dir) - end - - def backup_existing_builds_dir - timestamped_builds_path = File.join(app_builds_dir, '..', "builds.#{Time.now.to_i}") - if File.exists?(app_builds_dir) - FileUtils.mv(app_builds_dir, File.expand_path(timestamped_builds_path)) - end + super(Settings.gitlab_ci.builds_path) end end end diff --git a/lib/backup/files.rb b/lib/backup/files.rb new file mode 100644 index 00000000000..d0a6e8f27be --- /dev/null +++ b/lib/backup/files.rb @@ -0,0 +1,39 @@ +require 'open3' + +module Backup + class Files + attr_reader :name, :app_files_dir, :backup_tarball, :backup_dir, :files_parent_dir + + def initialize(app_files_dir) + @app_files_dir = File.realpath(app_files_dir) + @name = File.basename(app_files_dir) + @files_parent_dir = File.realpath(File.join(@app_files_dir, '..')) + @backup_dir = Gitlab.config.backup.path + @backup_tarball = File.join(@backup_dir, name + '.tar.gz') + end + + # Copy files from public/files to backup/files + def dump + FileUtils.mkdir_p(Gitlab.config.backup.path) + run_pipeline!([%W(tar -C #{files_parent_dir} -cf - #{name}), %W(gzip -c -1)], out: [backup_tarball, 'w', 0600]) + end + + def restore + backup_existing_files_dir + + run_pipeline!([%W(gzip -cd), %W(tar -C #{files_parent_dir} -xf -)], in: backup_tarball) + end + + def backup_existing_files_dir + timestamped_files_path = File.join(files_parent_dir, "#{name}.#{Time.now.to_i}") + if File.exists?(app_files_dir) + FileUtils.mv(app_files_dir, File.expand_path(timestamped_files_path)) + end + end + + def run_pipeline!(cmd_list, options={}) + status_list = Open3.pipeline(*cmd_list, options) + abort 'Backup failed' unless status_list.compact.all?(&:success?) + end + end +end diff --git a/lib/backup/manager.rb b/lib/backup/manager.rb index 5c42f25f4a2..f011fd03de0 100644 --- a/lib/backup/manager.rb +++ b/lib/backup/manager.rb @@ -150,11 +150,11 @@ module Backup private def backup_contents - folders_to_backup + ["backup_information.yml"] + folders_to_backup + ["uploads.tar.gz", "builds.tar.gz", "backup_information.yml"] end def folders_to_backup - folders = %w{repositories db uploads builds} + folders = %w{repositories db} if ENV["SKIP"] return folders.reject{ |folder| ENV["SKIP"].include?(folder) } diff --git a/lib/backup/uploads.rb b/lib/backup/uploads.rb index 1f9626644e6..7c0838cc8b7 100644 --- a/lib/backup/uploads.rb +++ b/lib/backup/uploads.rb @@ -1,34 +1,8 @@ module Backup - class Uploads - attr_reader :app_uploads_dir, :backup_uploads_dir, :backup_dir + class Uploads < Files def initialize - @app_uploads_dir = File.realpath(Rails.root.join('public', 'uploads')) - @backup_dir = Gitlab.config.backup.path - @backup_uploads_dir = File.join(Gitlab.config.backup.path, 'uploads') - end - - # Copy uploads from public/uploads to backup/uploads - def dump - FileUtils.rm_rf(backup_uploads_dir) - # Ensure the parent dir of backup_uploads_dir exists - FileUtils.mkdir_p(Gitlab.config.backup.path) - # Fail if somebody raced to create backup_uploads_dir before us - FileUtils.mkdir(backup_uploads_dir, mode: 0700) - FileUtils.cp_r(app_uploads_dir, backup_dir) - end - - def restore - backup_existing_uploads_dir - - FileUtils.cp_r(backup_uploads_dir, app_uploads_dir) - end - - def backup_existing_uploads_dir - timestamped_uploads_path = File.join(app_uploads_dir, '..', "uploads.#{Time.now.to_i}") - if File.exists?(app_uploads_dir) - FileUtils.mv(app_uploads_dir, File.expand_path(timestamped_uploads_path)) - end + super(Rails.root.join('public/uploads')) end end end -- cgit v1.2.1 From 90ddf140b9390647002771572d0375da0bb9dfa4 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 6 Oct 2015 15:06:15 +0200 Subject: Reduce disk IO during SQL backup By using light gzip compression we can save a lot of disk IO during the backup. --- lib/backup/database.rb | 53 +++++++++++++++++++++++--------------------------- lib/backup/manager.rb | 4 ++-- 2 files changed, 26 insertions(+), 31 deletions(-) (limited to 'lib/backup') diff --git a/lib/backup/database.rb b/lib/backup/database.rb index 959ac4b7868..4bdf6e1c628 100644 --- a/lib/backup/database.rb +++ b/lib/backup/database.rb @@ -2,26 +2,27 @@ require 'yaml' module Backup class Database - attr_reader :config, :db_dir + attr_reader :config, :db_file_name def initialize @config = YAML.load_file(File.join(Rails.root,'config','database.yml'))[Rails.env] - @db_dir = File.join(Gitlab.config.backup.path, 'db') + @db_file_name = File.join(Gitlab.config.backup.path, 'database.sql.gz') end def dump - FileUtils.rm_rf(@db_dir) - # Ensure the parent dir of @db_dir exists + FileUtils.rm_f(db_file_name) + compress_rd, compress_wr = IO.pipe + compress_pid = spawn(*%W(gzip -1 -c), in: compress_rd, out: [db_file_name, 'w', 0600]) + compress_rd.close + FileUtils.mkdir_p(Gitlab.config.backup.path) - # Fail if somebody raced to create @db_dir before us - FileUtils.mkdir(@db_dir, mode: 0700) - success = case config["adapter"] + dump_pid = case config["adapter"] when /^mysql/ then $progress.print "Dumping MySQL database #{config['database']} ... " # Workaround warnings from MySQL 5.6 about passwords on cmd line ENV['MYSQL_PWD'] = config["password"].to_s if config["password"] - system('mysqldump', *mysql_args, config['database'], out: db_file_name) + spawn('mysqldump', *mysql_args, config['database'], out: compress_wr) when "postgresql" then $progress.print "Dumping PostgreSQL database #{config['database']} ... " pg_env @@ -30,48 +31,42 @@ module Backup pgsql_args << "-n" pgsql_args << Gitlab.config.backup.pg_schema end - system('pg_dump', *pgsql_args, config['database'], out: db_file_name) + spawn('pg_dump', *pgsql_args, config['database'], out: compress_wr) end - report_success(success) - abort 'Backup failed' unless success + compress_wr.close + + success = [compress_pid, dump_pid].all? { |pid| Process.waitpid(pid); $?.success? } - $progress.print 'Compressing database ... ' - success = system('gzip', db_file_name) report_success(success) - abort 'Backup failed: compress error' unless success + abort 'Backup failed' unless success end def restore - $progress.print 'Decompressing database ... ' - success = system('gzip', '-d', db_file_name_gz) - report_success(success) - abort 'Restore failed: decompress error' unless success + decompress_rd, decompress_wr = IO.pipe + decompress_pid = spawn(*%W(gzip -cd), out: decompress_wr, in: db_file_name) + decompress_wr.close - success = case config["adapter"] + restore_pid = case config["adapter"] when /^mysql/ then $progress.print "Restoring MySQL database #{config['database']} ... " # Workaround warnings from MySQL 5.6 about passwords on cmd line ENV['MYSQL_PWD'] = config["password"].to_s if config["password"] - system('mysql', *mysql_args, config['database'], in: db_file_name) + spawn('mysql', *mysql_args, config['database'], in: decompress_rd) when "postgresql" then $progress.print "Restoring PostgreSQL database #{config['database']} ... " pg_env - system('psql', config['database'], '-f', db_file_name) + spawn('psql', config['database'], in: decompress_rd) end + decompress_rd.close + + success = [decompress_pid, restore_pid].all? { |pid| Process.waitpid(pid); $?.success? } + report_success(success) abort 'Restore failed' unless success end protected - def db_file_name - File.join(db_dir, 'database.sql') - end - - def db_file_name_gz - File.join(db_dir, 'database.sql.gz') - end - def mysql_args args = { 'host' => '--host', diff --git a/lib/backup/manager.rb b/lib/backup/manager.rb index f011fd03de0..53e79d4d1f7 100644 --- a/lib/backup/manager.rb +++ b/lib/backup/manager.rb @@ -150,11 +150,11 @@ module Backup private def backup_contents - folders_to_backup + ["uploads.tar.gz", "builds.tar.gz", "backup_information.yml"] + folders_to_backup + ["database.sql.gz", "uploads.tar.gz", "builds.tar.gz", "backup_information.yml"] end def folders_to_backup - folders = %w{repositories db} + folders = %w{repositories} if ENV["SKIP"] return folders.reject{ |folder| ENV["SKIP"].include?(folder) } -- cgit v1.2.1 From 7b71727c562b6f6337a180ae136be94bf0f6ed31 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 6 Oct 2015 15:10:13 +0200 Subject: Remove old "files" tarball explicitly --- lib/backup/files.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/backup') diff --git a/lib/backup/files.rb b/lib/backup/files.rb index d0a6e8f27be..1b08e3324d7 100644 --- a/lib/backup/files.rb +++ b/lib/backup/files.rb @@ -15,6 +15,7 @@ module Backup # Copy files from public/files to backup/files def dump FileUtils.mkdir_p(Gitlab.config.backup.path) + FileUtils.rm_f(backup_tarball) run_pipeline!([%W(tar -C #{files_parent_dir} -cf - #{name}), %W(gzip -c -1)], out: [backup_tarball, 'w', 0600]) end -- cgit v1.2.1 From e789644783fae55f1095ffcc38b32f810f549caa Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 6 Oct 2015 15:21:15 +0200 Subject: Keep old path: db/database.sql.gz Documentation elsewhere refers to this internal path, let's keep it. --- lib/backup/database.rb | 3 ++- lib/backup/manager.rb | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'lib/backup') diff --git a/lib/backup/database.rb b/lib/backup/database.rb index 4bdf6e1c628..fe0434361e8 100644 --- a/lib/backup/database.rb +++ b/lib/backup/database.rb @@ -6,10 +6,11 @@ module Backup def initialize @config = YAML.load_file(File.join(Rails.root,'config','database.yml'))[Rails.env] - @db_file_name = File.join(Gitlab.config.backup.path, 'database.sql.gz') + @db_file_name = File.join(Gitlab.config.backup.path, 'db', 'database.sql.gz') end def dump + FileUtils.mkdir_p(File.dirname(db_file_name)) FileUtils.rm_f(db_file_name) compress_rd, compress_wr = IO.pipe compress_pid = spawn(*%W(gzip -1 -c), in: compress_rd, out: [db_file_name, 'w', 0600]) diff --git a/lib/backup/manager.rb b/lib/backup/manager.rb index 53e79d4d1f7..f011fd03de0 100644 --- a/lib/backup/manager.rb +++ b/lib/backup/manager.rb @@ -150,11 +150,11 @@ module Backup private def backup_contents - folders_to_backup + ["database.sql.gz", "uploads.tar.gz", "builds.tar.gz", "backup_information.yml"] + folders_to_backup + ["uploads.tar.gz", "builds.tar.gz", "backup_information.yml"] end def folders_to_backup - folders = %w{repositories} + folders = %w{repositories db} if ENV["SKIP"] return folders.reject{ |folder| ENV["SKIP"].include?(folder) } -- cgit v1.2.1 From 7d58489fd908b2263f02e8919b1bd0b3fae1201d Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 6 Oct 2015 15:22:08 +0200 Subject: Remove unused variable --- lib/backup/files.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib/backup') diff --git a/lib/backup/files.rb b/lib/backup/files.rb index 1b08e3324d7..5a210a0e464 100644 --- a/lib/backup/files.rb +++ b/lib/backup/files.rb @@ -2,14 +2,13 @@ require 'open3' module Backup class Files - attr_reader :name, :app_files_dir, :backup_tarball, :backup_dir, :files_parent_dir + attr_reader :name, :app_files_dir, :backup_tarball, :files_parent_dir def initialize(app_files_dir) @app_files_dir = File.realpath(app_files_dir) @name = File.basename(app_files_dir) @files_parent_dir = File.realpath(File.join(@app_files_dir, '..')) - @backup_dir = Gitlab.config.backup.path - @backup_tarball = File.join(@backup_dir, name + '.tar.gz') + @backup_tarball = File.join(Gitlab.config.backup.path, name + '.tar.gz') end # Copy files from public/files to backup/files -- cgit v1.2.1 From 901f5445785a754227d8b77ca535947ab8cbbfca Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 6 Oct 2015 15:38:21 +0200 Subject: Remove superfluous mkdir -p --- lib/backup/database.rb | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/backup') diff --git a/lib/backup/database.rb b/lib/backup/database.rb index fe0434361e8..67b2a64bd10 100644 --- a/lib/backup/database.rb +++ b/lib/backup/database.rb @@ -16,8 +16,6 @@ module Backup compress_pid = spawn(*%W(gzip -1 -c), in: compress_rd, out: [db_file_name, 'w', 0600]) compress_rd.close - FileUtils.mkdir_p(Gitlab.config.backup.path) - dump_pid = case config["adapter"] when /^mysql/ then $progress.print "Dumping MySQL database #{config['database']} ... " -- cgit v1.2.1 From 58260a0327a953499a07e9cad8d9aaad2d25699b Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Fri, 16 Oct 2015 17:16:17 +0200 Subject: Do no rely on basename of builds, uploads --- lib/backup/builds.rb | 6 +++++- lib/backup/files.rb | 9 +++++---- lib/backup/uploads.rb | 6 +++++- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'lib/backup') diff --git a/lib/backup/builds.rb b/lib/backup/builds.rb index d269f8e260c..800f30c2144 100644 --- a/lib/backup/builds.rb +++ b/lib/backup/builds.rb @@ -1,7 +1,11 @@ module Backup class Builds < Files def initialize - super(Settings.gitlab_ci.builds_path) + super('builds', Settings.gitlab_ci.builds_path) + end + + def create_files_dir + Dir.mkdir(app_files_dir, 0700) end end end diff --git a/lib/backup/files.rb b/lib/backup/files.rb index 5a210a0e464..654b4d1c896 100644 --- a/lib/backup/files.rb +++ b/lib/backup/files.rb @@ -4,9 +4,9 @@ module Backup class Files attr_reader :name, :app_files_dir, :backup_tarball, :files_parent_dir - def initialize(app_files_dir) + def initialize(name, app_files_dir) + @name = name @app_files_dir = File.realpath(app_files_dir) - @name = File.basename(app_files_dir) @files_parent_dir = File.realpath(File.join(@app_files_dir, '..')) @backup_tarball = File.join(Gitlab.config.backup.path, name + '.tar.gz') end @@ -15,13 +15,14 @@ module Backup def dump FileUtils.mkdir_p(Gitlab.config.backup.path) FileUtils.rm_f(backup_tarball) - run_pipeline!([%W(tar -C #{files_parent_dir} -cf - #{name}), %W(gzip -c -1)], out: [backup_tarball, 'w', 0600]) + run_pipeline!([%W(tar -C #{app_files_dir} -cf - .), %W(gzip -c -1)], out: [backup_tarball, 'w', 0600]) end def restore backup_existing_files_dir + create_files_dir - run_pipeline!([%W(gzip -cd), %W(tar -C #{files_parent_dir} -xf -)], in: backup_tarball) + run_pipeline!([%W(gzip -cd), %W(tar -C #{app_files_dir} -xf -)], in: backup_tarball) end def backup_existing_files_dir diff --git a/lib/backup/uploads.rb b/lib/backup/uploads.rb index 7c0838cc8b7..0a0ec564ba4 100644 --- a/lib/backup/uploads.rb +++ b/lib/backup/uploads.rb @@ -2,7 +2,11 @@ module Backup class Uploads < Files def initialize - super(Rails.root.join('public/uploads')) + super('uploads', Rails.root.join('public/uploads')) + end + + def create_files_dir + Dir.mkdir(app_files_dir) end end end -- cgit v1.2.1