Merge branch 'export-script' into 'master'

Final export script See merge request !251
author: Marin Jankovski <marin@gitlab.com> 2015-09-21 08:26:12 +0000
committer: Marin Jankovski <marin@gitlab.com> 2015-09-21 08:26:12 +0000
commit: d26b4fa5388121b441545b3dc2b61651784c6802 (patch)
tree: bf8484ca30bd355e518659f94288e3a58541f2bf
parent: e3041b90dc4b02a1bd9b68e820d19f4a58488940 (diff)
parent: caebd5022dd255ebde42bfed2b854e88842dd873 (diff)
download: gitlab-ci-d26b4fa5388121b441545b3dc2b61651784c6802.tar.gz
12 files changed, 546 insertions, 89 deletions
diff --git a/README.md b/README.md
index e99c968..d15ded3 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,15 @@
 [![Dependency Status](https://gemnasium.com/gitlabhq/gitlab-ci.png)](https://gemnasium.com/gitlabhq/gitlab-ci)
 [![Coverage Status](https://coveralls.io/repos/gitlabhq/gitlab-ci/badge.png?branch=master)](https://coveralls.io/r/gitlabhq/gitlab-ci)
 
+## GitLab CI 8.0
+
+GitLab CI is now integrated in GitLab. The last 'stand-alone' version of GitLab
+CI was version 7.14. The sole purpose of GitLab CI 8.0 is to help you migrate
+data from your existing (pre-8.0) CI server into GitLab 8.0.
+
+The migration procedure is documented [in
+GitLab](https://gitlab.com/gitlab-org/gitlab-ce/blob/8-0-stable/doc/migrate_ci_to_ce/README.md).
+
 ### Information
 
-Please see the [GitLab CI page on the website](https://about.gitlab.com/gitlab-ci/) for all information.
-\ No newline at end of file
+Please see the [GitLab CI page on the website](https://about.gitlab.com/gitlab-ci/) for all information.
diff --git a/db/migrate/20150914102123_migrate_ci_tables.rb b/db/migrate/20150914102123_migrate_ci_tables.rb
new file mode 100644
index 0000000..639f8f5
--- /dev/null
+++ b/db/migrate/20150914102123_migrate_ci_tables.rb
@@ -0,0 +1,19 @@
+class MigrateCiTables < ActiveRecord::Migration
+  def up
+    rename_table :application_settings, :ci_application_settings
+    rename_table :builds, :ci_builds
+    rename_table :commits, :ci_commits
+    rename_table :events, :ci_events
+    rename_table :jobs, :ci_jobs
+    rename_table :projects, :ci_projects
+    rename_table :runner_projects, :ci_runner_projects
+    rename_table :runners, :ci_runners
+    rename_table :services, :ci_services
+    rename_table :tags, :ci_tags
+    rename_table :taggings, :ci_taggings
+    rename_table :trigger_requests, :ci_trigger_requests
+    rename_table :triggers, :ci_triggers
+    rename_table :variables, :ci_variables
+    rename_table :web_hooks, :ci_web_hooks
+  end
+end
diff --git a/db/migrate/20150921081619_rename_taggings_idx.rb b/db/migrate/20150921081619_rename_taggings_idx.rb
new file mode 100644
index 0000000..ceeb6da
--- /dev/null
+++ b/db/migrate/20150921081619_rename_taggings_idx.rb
@@ -0,0 +1,8 @@
+class RenameTaggingsIdx < ActiveRecord::Migration
+  def up
+    remove_index :ci_taggings, name: 'taggings_idx'
+    add_index :ci_taggings,
+              [:tag_id, :taggable_id, :taggable_type, :context, :tagger_id, :tagger_type],
+              unique: true, name: 'ci_taggings_idx'
+  end
+end
diff --git a/db/schema.rb b/db/schema.rb
index db684af..9d50519 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -11,19 +11,19 @@
 #
 # It's strongly recommended that you check this file into your version control system.
 
-ActiveRecord::Schema.define(version: 20150824202238) do
+ActiveRecord::Schema.define(version: 20150921081619) do
 
   # These are extensions that must be enabled in order to support this database
   enable_extension "plpgsql"
 
-  create_table "application_settings", force: true do |t|
+  create_table "ci_application_settings", force: true do |t|
     t.boolean  "all_broken_builds"
     t.boolean  "add_pusher"
     t.datetime "created_at"
     t.datetime "updated_at"
   end
 
-  create_table "builds", force: true do |t|
+  create_table "ci_builds", force: true do |t|
     t.integer  "project_id"
     t.string   "status"
     t.datetime "finished_at"
@@ -44,12 +44,12 @@ ActiveRecord::Schema.define(version: 20150824202238) do
     t.integer  "trigger_request_id"
   end
 
-  add_index "builds", ["commit_id"], name: "index_builds_on_commit_id", using: :btree
-  add_index "builds", ["project_id", "commit_id"], name: "index_builds_on_project_id_and_commit_id", using: :btree
-  add_index "builds", ["project_id"], name: "index_builds_on_project_id", using: :btree
-  add_index "builds", ["runner_id"], name: "index_builds_on_runner_id", using: :btree
+  add_index "ci_builds", ["commit_id"], name: "index_ci_builds_on_commit_id", using: :btree
+  add_index "ci_builds", ["project_id", "commit_id"], name: "index_ci_builds_on_project_id_and_commit_id", using: :btree
+  add_index "ci_builds", ["project_id"], name: "index_ci_builds_on_project_id", using: :btree
+  add_index "ci_builds", ["runner_id"], name: "index_ci_builds_on_runner_id", using: :btree
 
-  create_table "commits", force: true do |t|
+  create_table "ci_commits", force: true do |t|
     t.integer  "project_id"
     t.string   "ref"
     t.string   "sha"
@@ -62,13 +62,13 @@ ActiveRecord::Schema.define(version: 20150824202238) do
     t.datetime "committed_at"
   end
 
-  add_index "commits", ["project_id", "committed_at", "id"], name: "index_commits_on_project_id_and_committed_at_and_id", using: :btree
-  add_index "commits", ["project_id", "committed_at"], name: "index_commits_on_project_id_and_committed_at", using: :btree
-  add_index "commits", ["project_id", "sha"], name: "index_commits_on_project_id_and_sha", using: :btree
-  add_index "commits", ["project_id"], name: "index_commits_on_project_id", using: :btree
-  add_index "commits", ["sha"], name: "index_commits_on_sha", using: :btree
+  add_index "ci_commits", ["project_id", "committed_at", "id"], name: "index_ci_commits_on_project_id_and_committed_at_and_id", using: :btree
+  add_index "ci_commits", ["project_id", "committed_at"], name: "index_ci_commits_on_project_id_and_committed_at", using: :btree
+  add_index "ci_commits", ["project_id", "sha"], name: "index_ci_commits_on_project_id_and_sha", using: :btree
+  add_index "ci_commits", ["project_id"], name: "index_ci_commits_on_project_id", using: :btree
+  add_index "ci_commits", ["sha"], name: "index_ci_commits_on_sha", using: :btree
 
-  create_table "events", force: true do |t|
+  create_table "ci_events", force: true do |t|
     t.integer  "project_id"
     t.integer  "user_id"
     t.integer  "is_admin"
@@ -77,11 +77,11 @@ ActiveRecord::Schema.define(version: 20150824202238) do
     t.datetime "updated_at"
   end
 
-  add_index "events", ["created_at"], name: "index_events_on_created_at", using: :btree
-  add_index "events", ["is_admin"], name: "index_events_on_is_admin", using: :btree
-  add_index "events", ["project_id"], name: "index_events_on_project_id", using: :btree
+  add_index "ci_events", ["created_at"], name: "index_ci_events_on_created_at", using: :btree
+  add_index "ci_events", ["is_admin"], name: "index_ci_events_on_is_admin", using: :btree
+  add_index "ci_events", ["project_id"], name: "index_ci_events_on_project_id", using: :btree
 
-  create_table "jobs", force: true do |t|
+  create_table "ci_jobs", force: true do |t|
     t.integer  "project_id",                          null: false
     t.text     "commands"
     t.boolean  "active",         default: true,       null: false
@@ -95,10 +95,10 @@ ActiveRecord::Schema.define(version: 20150824202238) do
     t.datetime "deleted_at"
   end
 
-  add_index "jobs", ["deleted_at"], name: "index_jobs_on_deleted_at", using: :btree
-  add_index "jobs", ["project_id"], name: "index_jobs_on_project_id", using: :btree
+  add_index "ci_jobs", ["deleted_at"], name: "index_ci_jobs_on_deleted_at", using: :btree
+  add_index "ci_jobs", ["project_id"], name: "index_ci_jobs_on_project_id", using: :btree
 
-  create_table "projects", force: true do |t|
+  create_table "ci_projects", force: true do |t|
     t.string   "name",                                     null: false
     t.integer  "timeout",                  default: 3600,  null: false
     t.datetime "created_at"
@@ -121,17 +121,17 @@ ActiveRecord::Schema.define(version: 20150824202238) do
     t.text     "generated_yaml_config"
   end
 
-  create_table "runner_projects", force: true do |t|
+  create_table "ci_runner_projects", force: true do |t|
     t.integer  "runner_id",  null: false
     t.integer  "project_id", null: false
     t.datetime "created_at"
     t.datetime "updated_at"
   end
 
-  add_index "runner_projects", ["project_id"], name: "index_runner_projects_on_project_id", using: :btree
-  add_index "runner_projects", ["runner_id"], name: "index_runner_projects_on_runner_id", using: :btree
+  add_index "ci_runner_projects", ["project_id"], name: "index_ci_runner_projects_on_project_id", using: :btree
+  add_index "ci_runner_projects", ["runner_id"], name: "index_ci_runner_projects_on_runner_id", using: :btree
 
-  create_table "runners", force: true do |t|
+  create_table "ci_runners", force: true do |t|
     t.string   "token"
     t.datetime "created_at"
     t.datetime "updated_at"
@@ -146,7 +146,7 @@ ActiveRecord::Schema.define(version: 20150824202238) do
     t.string   "architecture"
   end
 
-  create_table "services", force: true do |t|
+  create_table "ci_services", force: true do |t|
     t.string   "type"
     t.string   "title"
     t.integer  "project_id",                 null: false
@@ -156,19 +156,9 @@ ActiveRecord::Schema.define(version: 20150824202238) do
     t.text     "properties"
   end
 
-  add_index "services", ["project_id"], name: "index_services_on_project_id", using: :btree
+  add_index "ci_services", ["project_id"], name: "index_ci_services_on_project_id", using: :btree
 
-  create_table "sessions", force: true do |t|
-    t.string   "session_id", null: false
-    t.text     "data"
-    t.datetime "created_at"
-    t.datetime "updated_at"
-  end
-
-  add_index "sessions", ["session_id"], name: "index_sessions_on_session_id", using: :btree
-  add_index "sessions", ["updated_at"], name: "index_sessions_on_updated_at", using: :btree
-
-  create_table "taggings", force: true do |t|
+  create_table "ci_taggings", force: true do |t|
     t.integer  "tag_id"
     t.integer  "taggable_id"
     t.string   "taggable_type"
@@ -178,17 +168,17 @@ ActiveRecord::Schema.define(version: 20150824202238) do
     t.datetime "created_at"
   end
 
-  add_index "taggings", ["tag_id", "taggable_id", "taggable_type", "context", "tagger_id", "tagger_type"], name: "taggings_idx", unique: true, using: :btree
-  add_index "taggings", ["taggable_id", "taggable_type", "context"], name: "index_taggings_on_taggable_id_and_taggable_type_and_context", using: :btree
+  add_index "ci_taggings", ["tag_id", "taggable_id", "taggable_type", "context", "tagger_id", "tagger_type"], name: "ci_taggings_idx", unique: true, using: :btree
+  add_index "ci_taggings", ["taggable_id", "taggable_type", "context"], name: "index_ci_taggings_on_taggable_id_and_taggable_type_and_context", using: :btree
 
-  create_table "tags", force: true do |t|
+  create_table "ci_tags", force: true do |t|
     t.string  "name"
     t.integer "taggings_count", default: 0
   end
 
-  add_index "tags", ["name"], name: "index_tags_on_name", unique: true, using: :btree
+  add_index "ci_tags", ["name"], name: "index_ci_tags_on_name", unique: true, using: :btree
 
-  create_table "trigger_requests", force: true do |t|
+  create_table "ci_trigger_requests", force: true do |t|
     t.integer  "trigger_id", null: false
     t.text     "variables"
     t.datetime "created_at"
@@ -196,7 +186,7 @@ ActiveRecord::Schema.define(version: 20150824202238) do
     t.integer  "commit_id"
   end
 
-  create_table "triggers", force: true do |t|
+  create_table "ci_triggers", force: true do |t|
     t.string   "token"
     t.integer  "project_id", null: false
     t.datetime "deleted_at"
@@ -204,9 +194,9 @@ ActiveRecord::Schema.define(version: 20150824202238) do
     t.datetime "updated_at"
   end
 
-  add_index "triggers", ["deleted_at"], name: "index_triggers_on_deleted_at", using: :btree
+  add_index "ci_triggers", ["deleted_at"], name: "index_ci_triggers_on_deleted_at", using: :btree
 
-  create_table "variables", force: true do |t|
+  create_table "ci_variables", force: true do |t|
     t.integer "project_id",           null: false
     t.string  "key"
     t.text    "value"
@@ -215,13 +205,23 @@ ActiveRecord::Schema.define(version: 20150824202238) do
     t.string  "encrypted_value_iv"
   end
 
-  add_index "variables", ["project_id"], name: "index_variables_on_project_id", using: :btree
+  add_index "ci_variables", ["project_id"], name: "index_ci_variables_on_project_id", using: :btree
 
-  create_table "web_hooks", force: true do |t|
+  create_table "ci_web_hooks", force: true do |t|
     t.string   "url",        null: false
     t.integer  "project_id", null: false
     t.datetime "created_at"
     t.datetime "updated_at"
   end
 
+  create_table "sessions", force: true do |t|
+    t.string   "session_id", null: false
+    t.text     "data"
+    t.datetime "created_at"
+    t.datetime "updated_at"
+  end
+
+  add_index "sessions", ["session_id"], name: "index_sessions_on_session_id", using: :btree
+  add_index "sessions", ["updated_at"], name: "index_sessions_on_updated_at", using: :btree
+
 end
diff --git a/lib/backup/builds.rb b/lib/backup/builds.rb
index 71e9704..9746d24 100644
--- a/lib/backup/builds.rb
+++ b/lib/backup/builds.rb
@@ -1,23 +1,38 @@
 module Backup
   class Builds
-    attr_reader :app_builds_dir, :backup_builds_dir, :backup_dir
+    attr_reader :app_builds_dir, :backup_builds_tarball, :backup_dir
 
     def initialize
       @app_builds_dir = File.realpath(Rails.root.join('builds'))
       @backup_dir = GitlabCi.config.backup.path
-      @backup_builds_dir = File.join(GitlabCi.config.backup.path, 'builds')
+      @backup_builds_tarball = File.join(GitlabCi.config.backup.path, 'builds/builds.tar.gz')
     end
 
     # Copy builds from builds directory to backup/builds
     def dump
-      FileUtils.mkdir_p(backup_builds_dir)
-      FileUtils.cp_r(app_builds_dir, backup_dir)
+      FileUtils.mkdir_p(File.dirname(backup_builds_tarball))
+      FileUtils.rm_f(backup_builds_tarball)
+
+      # Use 'tar -czf -' instead of 'tar -cz' because on some systems the
+      # default behavior of tar is to talk to a tape device instead of
+      # stdin/stdout.
+      system(
+        *%W(tar -C #{app_builds_dir} -czf - -- .),
+        out: [backup_builds_tarball, 'w', 0600]
+      )
     end
 
     def restore
       backup_existing_builds_dir
+      Dir.mkdir(app_builds_dir, 0700)
 
-      FileUtils.cp_r(backup_builds_dir, app_builds_dir)
+      # Use 'tar -xzf -' instead of 'tar -xz' because on some systems the
+      # default behavior of tar is to talk to a tape device instead of
+      # stdin/stdout.
+      system(
+        *%W(tar -C #{app_builds_dir} -xzf - -- .),
+        in: backup_builds_tarball
+      )
     end
 
     def backup_existing_builds_dir
diff --git a/lib/backup/database.rb b/lib/backup/database.rb
index 3ef5f44..93403d6 100644
--- a/lib/backup/database.rb
+++ b/lib/backup/database.rb
@@ -1,7 +1,15 @@
 require 'yaml'
+require 'open3'
 
 module Backup
   class Database
+    # These are the final CI tables (final prior to integration in GitLab)
+    TABLES = %w{
+      ci_application_settings ci_builds ci_commits ci_events ci_jobs ci_projects 
+      ci_runner_projects ci_runners ci_services ci_tags ci_taggings ci_trigger_requests 
+      ci_triggers ci_variables ci_web_hooks
+    }
+
     attr_reader :config, :db_dir
 
     def initialize
@@ -10,34 +18,86 @@ module Backup
       FileUtils.mkdir_p(@db_dir) unless Dir.exists?(@db_dir)
     end
 
-    def dump
-      success = case config["adapter"]
+    def dump(mysql_to_postgresql=false)
+      FileUtils.rm_f(db_file_name)
+      compress_rd, compress_wr = IO.pipe
+      compress_pid = spawn(*%W(gzip -1 -c), in: compress_rd, out: [db_file_name, 'w', 0600])
+      compress_rd.close
+
+      dump_pid = case config["adapter"]
       when /^mysql/ then
         $progress.print "Dumping MySQL database #{config['database']} ... "
-        system('mysqldump', *mysql_args, config['database'], out: db_file_name)
+        args = mysql_args
+        args << '--compatible=postgresql' if mysql_to_postgresql
+        spawn('mysqldump', *args, config['database'], *TABLES, out: compress_wr)
       when "postgresql" then
         $progress.print "Dumping PostgreSQL database #{config['database']} ... "
         pg_env
-        system('pg_dump', config['database'], out: db_file_name)
+        spawn('pg_dump', '--clean', *TABLES.map { |t| "--table=#{t}" }, config['database'], out: compress_wr)
       end
+      compress_wr.close
+
+      success = [compress_pid, dump_pid].all? { |pid| Process.waitpid(pid); $?.success? }
+
       report_success(success)
       abort 'Backup failed' unless success
+      convert_to_postgresql if mysql_to_postgresql
+    end
+
+    def convert_to_postgresql
+      mysql_dump_gz = db_file_name + '.mysql'
+      psql_dump_gz = db_file_name + '.psql'
+      drop_indexes_sql = File.join(db_dir, 'drop_indexes.sql')
+
+      File.rename(db_file_name, mysql_dump_gz)
+
+      $progress.print "Converting MySQL database dump to Postgres ... "
+      statuses = Open3.pipeline(
+        %W(gzip -cd #{mysql_dump_gz}),
+        %W(python lib/support/mysql-postgresql-converter/db_converter.py - - #{drop_indexes_sql}),
+        %W(gzip -1 -c),
+        out: [psql_dump_gz, 'w', 0600]
+      )
+
+      if !statuses.compact.all?(&:success?)
+        abort "mysql-to-postgresql-converter failed"
+      end
+      $progress.puts '[DONE]'.green
+      FileUtils.rm_f(mysql_dump_gz) # save disk space during conversion
+
+      $progress.print "Splicing in 'DROP INDEX' statements ... "
+      statuses = Open3.pipeline(
+        %W(lib/support/mysql-postgresql-converter/splice_drop_indexes #{psql_dump_gz} #{drop_indexes_sql}),
+        %W(gzip -1 -c),
+        out: [db_file_name, 'w', 0600]
+      )
+      if !statuses.compact.all?(&:success?)
+        abort "Failed to splice in 'DROP INDEXES' statements"
+      end
+
+      $progress.puts '[DONE]'.green
+    ensure
+      FileUtils.rm_f([mysql_dump_gz, psql_dump_gz, drop_indexes_sql])
     end
 
     def restore
-      success = case config["adapter"]
+      decompress_rd, decompress_wr = IO.pipe
+      decompress_pid = spawn(*%W(gzip -cd), out: decompress_wr, in: db_file_name)
+      decompress_wr.close
+
+      restore_pid = case config["adapter"]
       when /^mysql/ then
         $progress.print "Restoring MySQL database #{config['database']} ... "
-        system('mysql', *mysql_args, config['database'], in: db_file_name)
+        spawn('mysql', *mysql_args, config['database'], in: decompress_rd)
       when "postgresql" then
         $progress.print "Restoring PostgreSQL database #{config['database']} ... "
-        # Drop all tables because PostgreSQL DB dumps do not contain DROP TABLE
-        # statements like MySQL.
-        drop_all_tables
-        drop_all_postgres_sequences
         pg_env
-        system('psql', config['database'], '-f', db_file_name)
+        spawn('psql', config['database'], in: decompress_rd)
       end
+      decompress_rd.close
+
+      success = [decompress_pid, restore_pid].all? { |pid| Process.waitpid(pid); $?.success? }
+
       report_success(success)
       abort 'Restore failed' unless success
     end
@@ -45,7 +105,7 @@ module Backup
     protected
 
     def db_file_name
-      File.join(db_dir, 'database.sql')
+      File.join(db_dir, 'database.sql.gz')
     end
 
     def mysql_args
@@ -74,19 +134,5 @@ module Backup
         $progress.puts '[FAILED]'.red
       end
     end
-
-    def drop_all_tables
-      connection = ActiveRecord::Base.connection
-      connection.tables.each do |table|
-        connection.drop_table(table)
-      end
-    end
-
-    def drop_all_postgres_sequences
-      connection = ActiveRecord::Base.connection
-      connection.execute("SELECT c.relname FROM pg_class c WHERE c.relkind = 'S';").each do |sequence|
-        connection.execute("DROP SEQUENCE #{sequence['relname']}")
-      end
-    end
   end
 end
diff --git a/lib/backup/manager.rb b/lib/backup/manager.rb
index 43fb362..8ad3ea6 100644
--- a/lib/backup/manager.rb
+++ b/lib/backup/manager.rb
@@ -7,7 +7,7 @@ module Backup
       s[:backup_created_at]  = Time.now
       s[:gitlab_version]     = GitlabCi::VERSION
       s[:tar_version]        = tar_version
-      tar_file = "#{s[:backup_created_at].to_i}_gitlab_ci_backup.tar.gz"
+      tar_file = "#{s[:backup_created_at].to_i}_gitlab_ci_backup.tar"
 
       Dir.chdir(GitlabCi.config.backup.path) do
         File.open("#{GitlabCi.config.backup.path}/backup_information.yml",
@@ -20,7 +20,7 @@ module Backup
         # create archive
         $progress.print "Creating backup archive: #{tar_file} ... "
         orig_umask = File.umask(0077)
-        if Kernel.system('tar', '-czf', tar_file, *backup_contents)
+        if Kernel.system('tar', '-cf', tar_file, *backup_contents)
           $progress.puts "done".green
         else
           puts "creating archive #{tar_file} failed".red
@@ -78,11 +78,11 @@ module Backup
         removed = 0
         
         Dir.chdir(GitlabCi.config.backup.path) do
-          file_list = Dir.glob('*_gitlab_ci_backup.tar.gz')
-          file_list.map! { |f| $1.to_i if f =~ /(\d+)_gitlab_ci_backup.tar.gz/ }
+          file_list = Dir.glob('*_gitlab_ci_backup.tar')
+          file_list.map! { |f| $1.to_i if f =~ /(\d+)_gitlab_ci_backup.tar/ }
           file_list.sort.each do |timestamp|
             if Time.at(timestamp) < (Time.now - keep_time)
-              if Kernel.system(*%W(rm #{timestamp}_gitlab_ci_backup.tar.gz))
+              if Kernel.system(*%W(rm #{timestamp}_gitlab_ci_backup.tar))
                 removed += 1
               end
             end
@@ -99,7 +99,7 @@ module Backup
       Dir.chdir(GitlabCi.config.backup.path)
 
       # check for existing backups in the backup dir
-      file_list = Dir.glob("*_gitlab_ci_backup.tar.gz").each.map { |f| f.split(/_/).first.to_i }
+      file_list = Dir.glob("*_gitlab_ci_backup.tar").each.map { |f| f.split(/_/).first.to_i }
       puts "no backups found" if file_list.count == 0
 
       if file_list.count > 1 && ENV["BACKUP"].nil?
@@ -108,7 +108,7 @@ module Backup
         exit 1
       end
 
-      tar_file = ENV["BACKUP"].nil? ? File.join("#{file_list.first}_gitlab_ci_backup.tar.gz") : File.join(ENV["BACKUP"] + "_gitlab_ci_backup.tar.gz")
+      tar_file = ENV["BACKUP"].nil? ? File.join("#{file_list.first}_gitlab_ci_backup.tar") : File.join(ENV["BACKUP"] + "_gitlab_ci_backup.tar")
 
       unless File.exists?(tar_file)
         puts "The specified backup doesn't exist!"
@@ -117,7 +117,7 @@ module Backup
 
       $progress.print "Unpacking backup ... "
 
-      unless Kernel.system(*%W(tar -xzf #{tar_file}))
+      unless Kernel.system(*%W(tar -xf #{tar_file}))
         puts "unpacking backup failed".red
         exit 1
       else
diff --git a/lib/support/mysql-postgresql-converter/LICENSE b/lib/support/mysql-postgresql-converter/LICENSE
new file mode 100644
index 0000000..e6710c5
--- /dev/null
+++ b/lib/support/mysql-postgresql-converter/LICENSE
@@ -0,0 +1,9 @@
+Copyright (c) 2012 Lanyrd Inc.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/lib/support/mysql-postgresql-converter/README.md b/lib/support/mysql-postgresql-converter/README.md
new file mode 100644
index 0000000..f0e97cd
--- /dev/null
+++ b/lib/support/mysql-postgresql-converter/README.md
@@ -0,0 +1,54 @@
+MySQL to PostgreSQL Converter
+=============================
+
+Lanyrd's MySQL to PostgreSQL conversion script. Use with care.
+
+This script was designed for our specific database and column requirements -
+notably, it doubles the lengths of VARCHARs due to a unicode size problem we
+had, places indexes on all foreign keys, and presumes you're using Django
+for column typing purposes.
+
+GitLab-specific changes
+-----------------------
+
+The `gitlab` branch of this fork contains the following changes made for
+GitLab.
+
+- Guard against replacing '0000-00-00 00:00:00' inside SQL text fields.
+- Replace all MySQL zero-byte string literals `\0`. This is safe as of GitLab
+  6.8 because the GitLab database schema contains no binary columns.
+- Never set 'NOT NULL' constraints on datetimes.
+- Drop sequences before creating them.
+- Preserve default values of boolean (originally `tinyint(1)`) columns.
+- Import all indexes.
+- Import index names.
+- Drop tables before creating.
+- Drop indexes before creating.
+
+How to use
+----------
+
+First, dump your MySQL database in PostgreSQL-compatible format
+
+    mysqldump --compatible=postgresql --default-character-set=utf8 \
+    -r databasename.mysql -u root gitlabhq_production -p
+
+Then, convert it using the dbconverter.py script.
+
+    python db_converter.py databasename.mysql - drop_indexes.sql | gzip -c > databasename.unfinished.psql.gz
+
+It'll print progress to the terminal
+
+Now we have a DB dump that can be imported but the dump will be slow due
+to existing indexes.
+
+    ./splice_drop_indexes databasename.unfinished.psql.gz drop_indexes.sql > databasename.psql
+
+Next, load your new dump into a fresh PostgreSQL database using: 
+
+`psql -f databasename.psql -d gitlabhq_production`
+
+More information
+----------------
+
+You can learn more about the move which this powered at http://lanyrd.com/blog/2012/lanyrds-big-move/ and some technical details of it at http://www.aeracode.org/2012/11/13/one-change-not-enough/.
diff --git a/lib/support/mysql-postgresql-converter/db_converter.py b/lib/support/mysql-postgresql-converter/db_converter.py
new file mode 100644
index 0000000..38a0572
--- /dev/null
+++ b/lib/support/mysql-postgresql-converter/db_converter.py
@@ -0,0 +1,255 @@
+#!/usr/bin/env python
+
+"""
+Fixes a MySQL dump made with the right format so it can be directly
+imported to a new PostgreSQL database.
+
+Dump using:
+mysqldump --compatible=postgresql --default-character-set=utf8 -r databasename.mysql -u root databasename
+"""
+
+import re
+import sys
+import os
+import time
+import subprocess
+
+
+def parse(input_filename, output_filename, drop_index_filename):
+    "Feed it a file, and it'll output a fixed one"
+
+    # State storage
+    if input_filename == "-":
+        num_lines = -1
+    else:
+        num_lines = int(subprocess.check_output(["wc", "-l", input_filename]).strip().split()[0])
+    tables = {}
+    current_table = None
+    creation_lines = []
+    enum_types = []
+    foreign_key_lines = []
+    index_lines = []
+    drop_index_lines = []
+    sequence_lines = []
+    cast_lines = []
+    num_inserts = 0
+    started = time.time()
+
+    # Open output file and write header. Logging file handle will be stdout
+    # unless we're writing output to stdout, in which case NO PROGRESS FOR YOU.
+    if output_filename == "-":
+        output = sys.stdout
+        logging = open(os.devnull, "w")
+    else:
+        output = open(output_filename, "w")
+        logging = sys.stdout
+
+    drop_index = open(drop_index_filename, "w")
+
+    if input_filename == "-":
+        input_fh = sys.stdin
+    else:
+        input_fh = open(input_filename)
+
+
+    output.write("-- Converted by db_converter\n")
+    output.write("START TRANSACTION;\n")
+    output.write("SET standard_conforming_strings=off;\n")
+    output.write("SET escape_string_warning=off;\n")
+    output.write("SET CONSTRAINTS ALL DEFERRED;\n\n")
+
+    for i, line in enumerate(input_fh):
+        time_taken = time.time() - started
+        percentage_done = (i+1) / float(num_lines)
+        secs_left = (time_taken / percentage_done) - time_taken
+        logging.write("\rLine %i (of %s: %.2f%%) [%s tables] [%s inserts] [ETA: %i min %i sec]" % (
+            i + 1,
+            num_lines,
+            ((i+1)/float(num_lines))*100,
+            len(tables),
+            num_inserts,
+            secs_left // 60,
+            secs_left % 60,
+        ))
+        logging.flush()
+        line = line.decode("utf8").strip().replace(r"\\", "WUBWUBREALSLASHWUB").replace(r"\0", "").replace(r"\'", "''").replace("WUBWUBREALSLASHWUB", r"\\")
+        # Ignore comment lines
+        if line.startswith("--") or line.startswith("/*") or line.startswith("LOCK TABLES") or line.startswith("DROP TABLE") or line.startswith("UNLOCK TABLES") or not line:
+            continue
+
+        # Outside of anything handling
+        if current_table is None:
+            # Start of a table creation statement?
+            if line.startswith("CREATE TABLE"):
+                current_table = line.split('"')[1]
+                tables[current_table] = {"columns": []}
+                creation_lines = []
+            # Inserting data into a table?
+            elif line.startswith("INSERT INTO"):
+                output.write(re.sub(r"([^'])'0000-00-00 00:00:00'", r"\1NULL", line.encode("utf8")) + "\n")
+                num_inserts += 1
+            # ???
+            else:
+                print "\n ! Unknown line in main body: %s" % line
+
+        # Inside-create-statement handling
+        else:
+            # Is it a column?
+            if line.startswith('"'):
+                useless, name, definition = line.strip(",").split('"',2)
+                try:
+                    type, extra = definition.strip().split(" ", 1)
+
+                    # This must be a tricky enum
+                    if ')' in extra:
+                        type, extra = definition.strip().split(")")
+
+                except ValueError:
+                    type = definition.strip()
+                    extra = ""
+                extra = re.sub("CHARACTER SET [\w\d]+\s*", "", extra.replace("unsigned", ""))
+                extra = re.sub("COLLATE [\w\d]+\s*", "", extra.replace("unsigned", ""))
+
+                # See if it needs type conversion
+                final_type = None
+                final_default = None
+                set_sequence = None
+                if type == "tinyint(1)":
+                    type = "int4"
+                    set_sequence = True
+                    final_type = "boolean"
+
+                    if "DEFAULT '0'" in extra:
+                        final_default = "FALSE"
+                    elif "DEFAULT '1'" in extra:
+                        final_default = "TRUE"
+
+                elif type.startswith("int("):
+                    type = "integer"
+                    set_sequence = True
+                elif type.startswith("bigint("):
+                    type = "bigint"
+                    set_sequence = True
+                elif type == "longtext":
+                    type = "text"
+                elif type == "mediumtext":
+                    type = "text"
+                elif type == "tinytext":
+                    type = "text"
+                elif type.startswith("varchar("):
+                    size = int(type.split("(")[1].rstrip(")"))
+                    type = "varchar(%s)" % (size * 2)
+                elif type.startswith("smallint("):
+                    type = "int2"
+                    set_sequence = True
+                elif type == "datetime":
+                    type = "timestamp with time zone"
+                    extra = extra.replace("NOT NULL", "")
+                elif type == "double":
+                    type = "double precision"
+                elif type == "blob":
+                    type = "bytea"
+                elif type.startswith("enum(") or type.startswith("set("):
+
+                    types_str = type.split("(")[1].rstrip(")").rstrip('"')
+                    types_arr = [type_str.strip('\'') for type_str in types_str.split(",")]
+
+                    # Considered using values to make a name, but its dodgy
+                    # enum_name = '_'.join(types_arr)
+                    enum_name = "{0}_{1}".format(current_table, name)
+
+                    if enum_name not in enum_types:
+                        output.write("CREATE TYPE {0} AS ENUM ({1}); \n".format(enum_name, types_str));
+                        enum_types.append(enum_name)
+
+                    type = enum_name
+
+                if final_type:
+                    cast_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"%s\" DROP DEFAULT" % (current_table, name))
+                    cast_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"%s\" TYPE %s USING CAST(\"%s\" as %s)" % (current_table, name, final_type, name, final_type))
+                    if final_default:
+                        cast_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"%s\" SET DEFAULT %s" % (current_table, name, final_default))
+                # ID fields need sequences [if they are integers?]
+                if name == "id" and set_sequence is True:
+                    sequence_lines.append("DROP SEQUENCE IF EXISTS %s_id_seq" % (current_table))
+                    sequence_lines.append("CREATE SEQUENCE %s_id_seq" % (current_table))
+                    sequence_lines.append("SELECT setval('%s_id_seq', max(id)) FROM %s" % (current_table, current_table))
+                    sequence_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"id\" SET DEFAULT nextval('%s_id_seq')" % (current_table, current_table))
+                # Record it
+                creation_lines.append('"%s" %s %s' % (name, type, extra))
+                tables[current_table]['columns'].append((name, type, extra))
+            # Is it a constraint or something?
+            elif line.startswith("PRIMARY KEY"):
+                creation_lines.append(line.rstrip(","))
+            elif line.startswith("CONSTRAINT"):
+                foreign_key_lines.append("ALTER TABLE \"%s\" ADD CONSTRAINT %s DEFERRABLE INITIALLY DEFERRED" % (current_table, line.split("CONSTRAINT")[1].strip().rstrip(",")))
+                foreign_key_lines.append("CREATE INDEX ON \"%s\" %s" % (current_table, line.split("FOREIGN KEY")[1].split("REFERENCES")[0].strip().rstrip(",")))
+            elif line.startswith("UNIQUE KEY \""):
+                index_name      = line.split('"')[1].split('"')[0]
+                index_columns   = line.split("(")[1].split(")")[0]
+                index_lines.append("CREATE UNIQUE INDEX \"%s\" ON %s (%s)" % (index_name, current_table, index_columns))
+                drop_index_lines.append("DROP INDEX IF EXISTS \"%s\"" % index_name)
+            elif line.startswith("UNIQUE KEY"):
+                index_columns   = line.split("(")[1].split(")")[0]
+                index_lines.append("CREATE UNIQUE INDEX ON %s (%s)" % (current_table, index_columns))
+            elif line.startswith("KEY \""):
+                index_name      = line.split('"')[1].split('"')[0]
+                index_columns   = line.split("(")[1].split(")")[0]
+                index_lines.append("CREATE INDEX \"%s\" ON %s (%s)" % (index_name, current_table, index_columns))
+                drop_index_lines.append("DROP INDEX IF EXISTS \"%s\"" % index_name)
+            elif line.startswith("KEY"):
+                index_columns = line.split("(")[1].split(")")[0]
+                index_lines.append("CREATE INDEX ON %s (%s)" % (current_table, index_columns))
+            elif line.startswith("FULLTEXT KEY"):
+                fulltext_keys = " || ' ' || ".join( line.split('(')[-1].split(')')[0].replace('"', '').split(',') )
+                index_lines.append("CREATE INDEX ON %s USING gin(to_tsvector('english', %s))" % (current_table, fulltext_keys))
+            # Is it the end of the table?
+            elif line == ");":
+                output.write("DROP TABLE IF EXISTS \"%s\";\n" % current_table)
+                output.write("CREATE TABLE \"%s\" (\n" % current_table)
+                for i, line in enumerate(creation_lines):
+                    output.write("    %s%s\n" % (line, "," if i != (len(creation_lines) - 1) else ""))
+                output.write(');\n\n')
+                current_table = None
+            # ???
+            else:
+                print "\n ! Unknown line inside table creation: %s" % line
+
+
+    # Finish file
+    output.write("\n-- Post-data save --\n")
+    output.write("COMMIT;\n")
+    output.write("START TRANSACTION;\n")
+
+    # Write typecasts out
+    output.write("\n-- Typecasts --\n")
+    for line in cast_lines:
+        output.write("%s;\n" % line)
+
+    # Write FK constraints out
+    output.write("\n-- Foreign keys --\n")
+    for line in foreign_key_lines:
+        output.write("%s;\n" % line)
+
+    # Write sequences out
+    output.write("\n-- Sequences --\n")
+    for line in sequence_lines:
+        output.write("%s;\n" % line)
+
+    drop_index.write("-- Drop indexes --\n")
+    for line in drop_index_lines:
+        drop_index.write("%s;\n" % line)
+
+    # Write indexes out
+    output.write("\n-- Indexes --\n")
+    for line in index_lines:
+        output.write("%s;\n" % line)
+
+    # Finish file
+    output.write("\n")
+    output.write("COMMIT;\n")
+    print ""
+
+
+if __name__ == "__main__":
+    parse(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/lib/support/mysql-postgresql-converter/splice_drop_indexes b/lib/support/mysql-postgresql-converter/splice_drop_indexes
new file mode 100755
index 0000000..6621ab0
--- /dev/null
+++ b/lib/support/mysql-postgresql-converter/splice_drop_indexes
@@ -0,0 +1,37 @@
+#!/bin/sh
+# This script reorders database dumps generated by db_converter.py for
+# efficient consumption by Postgres.
+
+fail() {
+  echo "$@" 1>&2
+  exit 1
+}
+
+db_gz=$1
+drop_indexes_sql=$2
+
+if [ -z "$db_gz" ] || [ -z "$drop_indexes_sql" ] ; then
+  fail "Usage: $0 database.sql.gz drop_indexes.sql"
+fi
+
+# Capture all text up to the first occurence of 'SET CONSTRAINTS'
+preamble=$(zcat "$db_gz" | sed '/SET CONSTRAINTS/q')
+if [ -z "$preamble" ] ; then
+  fail "Could not read preamble"
+fi
+
+drop_indexes=$(cat "$drop_indexes_sql")
+if [ -z "$drop_indexes" ] ; then
+  fail "Could not read DROP INDEXES file"
+fi
+
+# Print preamble and drop indexes
+cat <<EOF
+${preamble}
+
+${drop_indexes}
+EOF
+
+# Print the rest of database.sql.gz. I don't understand this awk script but it
+# prints all lines after the first match of 'SET CONSTRAINTS'.
+zcat "$db_gz" | awk 'f; /SET CONSTRAINTS/ { f = 1 }'
diff --git a/lib/tasks/backup.rake b/lib/tasks/backup.rake
index df20c40..67c2dc4 100644
--- a/lib/tasks/backup.rake
+++ b/lib/tasks/backup.rake
@@ -3,9 +3,14 @@ namespace :backup do
   desc "GITLAB | Create a backup of the GitLab CI database"
   task create: :environment do
     configure_cron_mode
+    mysql_to_postgresql = (ENV['MYSQL_TO_POSTGRESQL'] == '1')
+
+    $progress.puts "Applying final database migrations ... ".blue
+    Rake::Task['db:migrate'].invoke
+    $progress.puts "done".green
 
     $progress.puts "Dumping database ... ".blue
-    Backup::Database.new.dump
+    Backup::Database.new.dump(mysql_to_postgresql)
     $progress.puts "done".green
 
     $progress.puts "Dumping builds ... ".blue
author	Marin Jankovski <marin@gitlab.com>	2015-09-21 08:26:12 +0000
committer	Marin Jankovski <marin@gitlab.com>	2015-09-21 08:26:12 +0000
commit	d26b4fa5388121b441545b3dc2b61651784c6802 (patch)
tree	bf8484ca30bd355e518659f94288e3a58541f2bf
parent	e3041b90dc4b02a1bd9b68e820d19f4a58488940 (diff)
parent	caebd5022dd255ebde42bfed2b854e88842dd873 (diff)
download	gitlab-ci-d26b4fa5388121b441545b3dc2b61651784c6802.tar.gz