summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Vosmaer <contact@jacobvosmaer.nl>2015-09-18 16:33:38 +0200
committerJacob Vosmaer <contact@jacobvosmaer.nl>2015-09-18 16:57:33 +0200
commitb9bbad5298e2d50bb424dad2f91a1ab75a36c687 (patch)
treef8671b2a21adb622d1f9e62f52e821c85b10d90b
parentbf1e976d918bc343a0739e05c6f62c76f4035ba2 (diff)
downloadgitlab-ci-b9bbad5298e2d50bb424dad2f91a1ab75a36c687.tar.gz
Integrate mysql-to-postgres conversion
-rw-r--r--lib/backup/database.rb43
-rw-r--r--lib/support/mysql-postgresql-converter/README.md9
-rw-r--r--lib/support/mysql-postgresql-converter/db_converter.py13
-rwxr-xr-xlib/support/mysql-postgresql-converter/splice_drop_indexes37
-rw-r--r--lib/tasks/backup.rake3
5 files changed, 89 insertions, 16 deletions
diff --git a/lib/backup/database.rb b/lib/backup/database.rb
index e63dcb8..3d09c8c 100644
--- a/lib/backup/database.rb
+++ b/lib/backup/database.rb
@@ -1,4 +1,5 @@
require 'yaml'
+require 'open3'
module Backup
class Database
@@ -17,7 +18,7 @@ module Backup
FileUtils.mkdir_p(@db_dir) unless Dir.exists?(@db_dir)
end
- def dump
+ def dump(mysql_to_postgresql=false)
FileUtils.rm_f(db_file_name)
compress_rd, compress_wr = IO.pipe
compress_pid = spawn(*%W(gzip -c), in: compress_rd, out: [db_file_name, 'w', 0600])
@@ -26,7 +27,9 @@ module Backup
dump_pid = case config["adapter"]
when /^mysql/ then
$progress.print "Dumping MySQL database #{config['database']} ... "
- spawn('mysqldump', *mysql_args, config['database'], *TABLES, out: compress_wr)
+ args = mysql_args
+ args << '--compatible=postgresql' if mysql_to_postgresql
+ spawn('mysqldump', *args, config['database'], *TABLES, out: compress_wr)
when "postgresql" then
$progress.print "Dumping PostgreSQL database #{config['database']} ... "
pg_env
@@ -38,6 +41,42 @@ module Backup
report_success(success)
abort 'Backup failed' unless success
+ convert_to_postgresql if mysql_to_postgresql
+ end
+
+ def convert_to_postgresql
+ mysql_dump_gz = db_file_name + '.mysql'
+ psql_dump_gz = db_file_name + '.psql'
+ drop_indexes_sql = File.join(db_dir, 'drop_indexes.sql')
+
+ File.rename(db_file_name, mysql_dump_gz)
+
+ $progress.print "Converting MySQL database dump to Postgres ... "
+ statuses = Open3.pipeline(
+ %W(gzip -cd #{mysql_dump_gz}),
+ %W(python lib/support/mysql-postgresql-converter/db_converter.py - - #{drop_indexes_sql}),
+ %W(gzip -c),
+ out: [psql_dump_gz, 'w', 0600]
+ )
+
+ if !statuses.compact.all?(&:success?)
+ abort "mysql-to-postgresql-converter failed"
+ end
+ $progress.puts '[DONE]'.green
+
+ $progress.print "Splicing in 'DROP INDEX' statements ... "
+ statuses = Open3.pipeline(
+ %W(lib/support/mysql-postgresql-converter/splice_drop_indexes #{psql_dump_gz} #{drop_indexes_sql}),
+ %W(gzip -c),
+ out: [db_file_name, 'w', 0600]
+ )
+ if !statuses.compact.all?(&:success?)
+ abort "Failed to splice in 'DROP INDEXES' statements"
+ end
+
+ $progress.puts '[DONE]'.green
+ ensure
+ FileUtils.rm_f([mysql_dump_gz, psql_dump_gz, drop_indexes_sql])
end
def restore
diff --git a/lib/support/mysql-postgresql-converter/README.md b/lib/support/mysql-postgresql-converter/README.md
index 9940ee5..f0e97cd 100644
--- a/lib/support/mysql-postgresql-converter/README.md
+++ b/lib/support/mysql-postgresql-converter/README.md
@@ -35,17 +35,14 @@ First, dump your MySQL database in PostgreSQL-compatible format
Then, convert it using the dbconverter.py script.
- python db_converter.py databasename.mysql databasename.psql
+ python db_converter.py databasename.mysql - drop_indexes.sql | gzip -c > databasename.unfinished.psql.gz
It'll print progress to the terminal
Now we have a DB dump that can be imported but the dump will be slow due
-to existing indexes. We use 'ed' to edit the DB dump file and move the
-'DROP INDEX' statements to the start of the import. Ed is not the fastest
-tool for this job if your DB dump is multiple gigabytes. (Patches to
-the converter are welcome!)
+to existing indexes.
- ed -s databasename.psql < move_drop_indexes.ed
+ ./splice_drop_indexes databasename.unfinished.psql.gz drop_indexes.sql > databasename.psql
Next, load your new dump into a fresh PostgreSQL database using:
diff --git a/lib/support/mysql-postgresql-converter/db_converter.py b/lib/support/mysql-postgresql-converter/db_converter.py
index 8a7f8a2..38a0572 100644
--- a/lib/support/mysql-postgresql-converter/db_converter.py
+++ b/lib/support/mysql-postgresql-converter/db_converter.py
@@ -15,7 +15,7 @@ import time
import subprocess
-def parse(input_filename, output_filename):
+def parse(input_filename, output_filename, drop_index_filename):
"Feed it a file, and it'll output a fixed one"
# State storage
@@ -44,6 +44,8 @@ def parse(input_filename, output_filename):
output = open(output_filename, "w")
logging = sys.stdout
+ drop_index = open(drop_index_filename, "w")
+
if input_filename == "-":
input_fh = sys.stdin
else:
@@ -234,12 +236,9 @@ def parse(input_filename, output_filename):
for line in sequence_lines:
output.write("%s;\n" % line)
- # This line is an anchor for move_drop_indexes.ed
- output.write("\n-- Drop indexes --\n")
+ drop_index.write("-- Drop indexes --\n")
for line in drop_index_lines:
- output.write("%s;\n" % line)
- # This line is an anchor for move_drop_indexes.ed
- output.write("-- END Drop indexes --\n")
+ drop_index.write("%s;\n" % line)
# Write indexes out
output.write("\n-- Indexes --\n")
@@ -253,4 +252,4 @@ def parse(input_filename, output_filename):
if __name__ == "__main__":
- parse(sys.argv[1], sys.argv[2])
+ parse(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/lib/support/mysql-postgresql-converter/splice_drop_indexes b/lib/support/mysql-postgresql-converter/splice_drop_indexes
new file mode 100755
index 0000000..a2b5de8
--- /dev/null
+++ b/lib/support/mysql-postgresql-converter/splice_drop_indexes
@@ -0,0 +1,37 @@
+#!/bin/sh
+# This script reorders database dumps generated by db_converter.py for
+# efficient consumption by Postgres.
+
+fail() {
+ echo "$@" 1>2
+ exit 1
+}
+
+db_gz=$1
+drop_indexes_sql=$2
+
+if [ -z "$db_gz" ] || [ -z "$drop_indexes_sql" ] ; then
+ fail "Usage: $0 database.sql.gz drop_indexes.sql"
+fi
+
+# Capture all text up to the first occurence of 'SET CONSTRAINTS'
+preamble=$(zcat "$db_gz" | sed '/SET CONSTRAINTS/q')
+if [ -z "$preamble" ] ; then
+ fail "Could not read preamble"
+fi
+
+drop_indexes=$(cat "$drop_indexes_sql")
+if [ -z "$drop_indexes" ] ; then
+ fail "Could not read DROP INDEXES file"
+fi
+
+# Print preamble and drop indexes
+cat <<EOF
+${preamble}
+
+${drop_indexes}
+EOF
+
+# Print the rest of database.sql.gz. I don't understand this awk script but it
+# prints all lines after the first match of 'SET CONSTRAINTS'.
+zcat "$db_gz" | awk 'f; /SET CONSTRAINTS/ { f = 1 }'
diff --git a/lib/tasks/backup.rake b/lib/tasks/backup.rake
index 4eaa311..67c2dc4 100644
--- a/lib/tasks/backup.rake
+++ b/lib/tasks/backup.rake
@@ -3,13 +3,14 @@ namespace :backup do
desc "GITLAB | Create a backup of the GitLab CI database"
task create: :environment do
configure_cron_mode
+ mysql_to_postgresql = (ENV['MYSQL_TO_POSTGRESQL'] == '1')
$progress.puts "Applying final database migrations ... ".blue
Rake::Task['db:migrate'].invoke
$progress.puts "done".green
$progress.puts "Dumping database ... ".blue
- Backup::Database.new.dump
+ Backup::Database.new.dump(mysql_to_postgresql)
$progress.puts "done".green
$progress.puts "Dumping builds ... ".blue