From 7a8a892efdf59925a95cdf6504f7c74c31b87eeb Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Fri, 25 Sep 2015 17:12:41 +0200 Subject: Add "rake gitlab:list_repos" task --- doc/raketasks/list_repos.md | 30 ++++++++++++++++++++++++++++++ lib/tasks/gitlab/list_repos.rake | 16 ++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 doc/raketasks/list_repos.md create mode 100644 lib/tasks/gitlab/list_repos.rake diff --git a/doc/raketasks/list_repos.md b/doc/raketasks/list_repos.md new file mode 100644 index 00000000000..476428eb4f5 --- /dev/null +++ b/doc/raketasks/list_repos.md @@ -0,0 +1,30 @@ +# Listing repository directories + +You can print a list of all Git repositories on disk managed by +GitLab with the following command: + +``` +# Omnibus +sudo gitlab-rake gitlab:list_repos + +# Source +cd /home/git/gitlab +sudo -u git -H bundle exec rake gitlab:list_repos RAILS_ENV=production +``` + +If you only want to list projects with recent activity you can pass +a date with the 'SINCE' environment variable. The time you specify +is parsed by the Rails [TimeZone#parse +function](http://api.rubyonrails.org/classes/ActiveSupport/TimeZone.html#method-i-parse). + +``` +# Omnibus +sudo gitlab-rake gitlab:list_repos SINCE='Sep 1 2015' + +# Source +cd /home/git/gitlab +sudo -u git -H bundle exec rake gitlab:list_repos RAILS_ENV=production SINCE='Sep 1 2015' +``` + +Note that the projects listed are NOT sorted by activity; they use +the default ordering of the GitLab Rails application. diff --git a/lib/tasks/gitlab/list_repos.rake b/lib/tasks/gitlab/list_repos.rake new file mode 100644 index 00000000000..1377e1ea910 --- /dev/null +++ b/lib/tasks/gitlab/list_repos.rake @@ -0,0 +1,16 @@ +namespace :gitlab do + task list_repos: :environment do + scope = Project + if ENV['SINCE'] + date = Time.parse(ENV['SINCE']) + warn "Listing repositories with activity since #{date}" + project_ids = Project.where(['last_activity_at > ?', date]).pluck(:id) + scope = scope.where(id: project_ids) + end + scope.find_each do |project| + base = File.join(Gitlab.config.gitlab_shell.repos_path, project.path_with_namespace) + puts base + '.git' + puts base + '.wiki.git' + end + end +end -- cgit v1.2.1 From 5bcd0efe3e0b1fef06147d87f843adac717d7c42 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Fri, 25 Sep 2015 18:31:54 +0200 Subject: Add parallel-rsync-repos script and start docs --- bin/parallel-rsync-repos | 26 ++++++++++ doc/operations/rsyncing_repositories.md | 87 +++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 bin/parallel-rsync-repos create mode 100644 doc/operations/rsyncing_repositories.md diff --git a/bin/parallel-rsync-repos b/bin/parallel-rsync-repos new file mode 100644 index 00000000000..b2429f743b5 --- /dev/null +++ b/bin/parallel-rsync-repos @@ -0,0 +1,26 @@ +#!/bin/sh +# this script should run as the 'git' user, not root, because of mkdir +# +# Example invocation: +# find /var/opt/gitlab/git-data/repositories -maxdepth 2 | \ +# parallel-rsync-repos /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories + +SRC=$1 +DEST=$2 + +if [ -z "$JOBS" ] ; then + JOBS=10 +fi + +if [ -z "$SRC" ] || [ -z "$DEST" ] ; then + echo "Usage: $0 SRC DEST" + exit 1 +fi + +if ! cd $SRC ; then + echo "cd $SRC failed" + exit 1 +fi + +sed "s|$SRC|./|" |\ + parallel -j$JOBS --progress "mkdir -p $DEST/{} && rsync --delete -a {}/. $DEST/{}/" diff --git a/doc/operations/rsyncing_repositories.md b/doc/operations/rsyncing_repositories.md new file mode 100644 index 00000000000..231e09f0462 --- /dev/null +++ b/doc/operations/rsyncing_repositories.md @@ -0,0 +1,87 @@ +# Moving repositories managed by GitLab + +Sometimes you need to move all repositories managed by GitLab to +another filesystem or another server. In this document we will look +at some of the ways you can copy all your repositories from +`/var/opt/gitlab/git-data/repositories` to `/mnt/gitlab/repositories`. + +We will look at three scenarios: the target directory is empty, the +target directory contains an outdated copy of the repositories, and +how to deal with thousands of repositories. + +**Each of the approaches we list can/will overwrite data in the +target directory `/mnt/gitlab/repositories`. Do not mix up the +source and the target.** + +## Target directory is empty: use a tar pipe + +If the target directory `/mnt/gitlab/repositories` is empty the +simplest thing to do is to use a tar pipe. + +``` +# As the git user +tar -C /var/opt/gitlab/git-data/repositories -cf - -- . |\ + tar -C /mnt/gitlab/repositories -xf - +``` + +If you want to see progress, replace `-xf` with `-xvf`. + +### Tar pipe to another server + +You can also use a tar pipe to copy data to another server. If your +'git' user has SSH access to the newserver as 'git@newserver', you +can pipe the data through SSH. + +``` +# As the git user +tar -C /var/opt/gitlab/git-data/repositories -cf - -- . |\ + ssh git@newserver tar -C /mnt/gitlab/repositories -xf - +``` + +If you want to compress the data before it goes over the network +(which will cost you CPU cycles) you can replace `ssh` with `ssh +-C`. + +## The target directory contains an outdated copy of the repositories: use rsync + +In this scenario it is better to use rsync. This utility is either +already installed on your system or easily installable via apt, yum +etc. + +``` +# As the 'git' user +rsync -a --delete /var/opt/gitlab/git-data/repositories/. \ + /mnt/gitlab/repositories +``` + +The `/.` in the command above is very important, without it you can +easily get the wrong directory structure in the target directory. +If you want to see progress, replace `-a` with `-av`. + +### Single rsync to another server + +If the 'git' user on your source system has SSH access to the target +server you can send the repositories over the network with rsync. + +``` +# As the 'git' user +rsync -a --delete /var/opt/gitlab/git-data/repositories/. \ + git@newserver:/mnt/gitlab/repositories +``` + +## Thousands of Git repositories: use one rsync per repository + +Every time you start an rsync job it has to inspect all files in +the source directory, all files in the target directory, and then +decide what files to copy or not. If the source or target directory +has many contents this startup phase of rsync can become a burden +for your GitLab server. In cases like this you can make rsync's +life easier by dividing its work in smaller pieces, and sync one +repository at a time. + +In addition to rsync we will use [GNU +Parallel](http://www.gnu.org/software/parallel/). This utility is +not included in GitLab so you need to install it yourself with apt +or yum. Also note that the GitLab scripts we used below were added +in GitLab 8.???. + -- cgit v1.2.1 From 9f3984b5e8fb261eb24be76ec548d83c43d58b96 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Fri, 25 Sep 2015 18:32:51 +0200 Subject: Rename doc file --- doc/operations/moving_repositories.md | 87 +++++++++++++++++++++++++++++++++ doc/operations/rsyncing_repositories.md | 87 --------------------------------- 2 files changed, 87 insertions(+), 87 deletions(-) create mode 100644 doc/operations/moving_repositories.md delete mode 100644 doc/operations/rsyncing_repositories.md diff --git a/doc/operations/moving_repositories.md b/doc/operations/moving_repositories.md new file mode 100644 index 00000000000..231e09f0462 --- /dev/null +++ b/doc/operations/moving_repositories.md @@ -0,0 +1,87 @@ +# Moving repositories managed by GitLab + +Sometimes you need to move all repositories managed by GitLab to +another filesystem or another server. In this document we will look +at some of the ways you can copy all your repositories from +`/var/opt/gitlab/git-data/repositories` to `/mnt/gitlab/repositories`. + +We will look at three scenarios: the target directory is empty, the +target directory contains an outdated copy of the repositories, and +how to deal with thousands of repositories. + +**Each of the approaches we list can/will overwrite data in the +target directory `/mnt/gitlab/repositories`. Do not mix up the +source and the target.** + +## Target directory is empty: use a tar pipe + +If the target directory `/mnt/gitlab/repositories` is empty the +simplest thing to do is to use a tar pipe. + +``` +# As the git user +tar -C /var/opt/gitlab/git-data/repositories -cf - -- . |\ + tar -C /mnt/gitlab/repositories -xf - +``` + +If you want to see progress, replace `-xf` with `-xvf`. + +### Tar pipe to another server + +You can also use a tar pipe to copy data to another server. If your +'git' user has SSH access to the newserver as 'git@newserver', you +can pipe the data through SSH. + +``` +# As the git user +tar -C /var/opt/gitlab/git-data/repositories -cf - -- . |\ + ssh git@newserver tar -C /mnt/gitlab/repositories -xf - +``` + +If you want to compress the data before it goes over the network +(which will cost you CPU cycles) you can replace `ssh` with `ssh +-C`. + +## The target directory contains an outdated copy of the repositories: use rsync + +In this scenario it is better to use rsync. This utility is either +already installed on your system or easily installable via apt, yum +etc. + +``` +# As the 'git' user +rsync -a --delete /var/opt/gitlab/git-data/repositories/. \ + /mnt/gitlab/repositories +``` + +The `/.` in the command above is very important, without it you can +easily get the wrong directory structure in the target directory. +If you want to see progress, replace `-a` with `-av`. + +### Single rsync to another server + +If the 'git' user on your source system has SSH access to the target +server you can send the repositories over the network with rsync. + +``` +# As the 'git' user +rsync -a --delete /var/opt/gitlab/git-data/repositories/. \ + git@newserver:/mnt/gitlab/repositories +``` + +## Thousands of Git repositories: use one rsync per repository + +Every time you start an rsync job it has to inspect all files in +the source directory, all files in the target directory, and then +decide what files to copy or not. If the source or target directory +has many contents this startup phase of rsync can become a burden +for your GitLab server. In cases like this you can make rsync's +life easier by dividing its work in smaller pieces, and sync one +repository at a time. + +In addition to rsync we will use [GNU +Parallel](http://www.gnu.org/software/parallel/). This utility is +not included in GitLab so you need to install it yourself with apt +or yum. Also note that the GitLab scripts we used below were added +in GitLab 8.???. + diff --git a/doc/operations/rsyncing_repositories.md b/doc/operations/rsyncing_repositories.md deleted file mode 100644 index 231e09f0462..00000000000 --- a/doc/operations/rsyncing_repositories.md +++ /dev/null @@ -1,87 +0,0 @@ -# Moving repositories managed by GitLab - -Sometimes you need to move all repositories managed by GitLab to -another filesystem or another server. In this document we will look -at some of the ways you can copy all your repositories from -`/var/opt/gitlab/git-data/repositories` to `/mnt/gitlab/repositories`. - -We will look at three scenarios: the target directory is empty, the -target directory contains an outdated copy of the repositories, and -how to deal with thousands of repositories. - -**Each of the approaches we list can/will overwrite data in the -target directory `/mnt/gitlab/repositories`. Do not mix up the -source and the target.** - -## Target directory is empty: use a tar pipe - -If the target directory `/mnt/gitlab/repositories` is empty the -simplest thing to do is to use a tar pipe. - -``` -# As the git user -tar -C /var/opt/gitlab/git-data/repositories -cf - -- . |\ - tar -C /mnt/gitlab/repositories -xf - -``` - -If you want to see progress, replace `-xf` with `-xvf`. - -### Tar pipe to another server - -You can also use a tar pipe to copy data to another server. If your -'git' user has SSH access to the newserver as 'git@newserver', you -can pipe the data through SSH. - -``` -# As the git user -tar -C /var/opt/gitlab/git-data/repositories -cf - -- . |\ - ssh git@newserver tar -C /mnt/gitlab/repositories -xf - -``` - -If you want to compress the data before it goes over the network -(which will cost you CPU cycles) you can replace `ssh` with `ssh --C`. - -## The target directory contains an outdated copy of the repositories: use rsync - -In this scenario it is better to use rsync. This utility is either -already installed on your system or easily installable via apt, yum -etc. - -``` -# As the 'git' user -rsync -a --delete /var/opt/gitlab/git-data/repositories/. \ - /mnt/gitlab/repositories -``` - -The `/.` in the command above is very important, without it you can -easily get the wrong directory structure in the target directory. -If you want to see progress, replace `-a` with `-av`. - -### Single rsync to another server - -If the 'git' user on your source system has SSH access to the target -server you can send the repositories over the network with rsync. - -``` -# As the 'git' user -rsync -a --delete /var/opt/gitlab/git-data/repositories/. \ - git@newserver:/mnt/gitlab/repositories -``` - -## Thousands of Git repositories: use one rsync per repository - -Every time you start an rsync job it has to inspect all files in -the source directory, all files in the target directory, and then -decide what files to copy or not. If the source or target directory -has many contents this startup phase of rsync can become a burden -for your GitLab server. In cases like this you can make rsync's -life easier by dividing its work in smaller pieces, and sync one -repository at a time. - -In addition to rsync we will use [GNU -Parallel](http://www.gnu.org/software/parallel/). This utility is -not included in GitLab so you need to install it yourself with apt -or yum. Also note that the GitLab scripts we used below were added -in GitLab 8.???. - -- cgit v1.2.1 From 4dd7c2f1e0174f8de6be9c57f7296e64e1534af5 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Fri, 25 Sep 2015 18:35:41 +0200 Subject: Remove unwanted linebreak --- doc/operations/moving_repositories.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/operations/moving_repositories.md b/doc/operations/moving_repositories.md index 231e09f0462..88b90e91316 100644 --- a/doc/operations/moving_repositories.md +++ b/doc/operations/moving_repositories.md @@ -39,8 +39,7 @@ tar -C /var/opt/gitlab/git-data/repositories -cf - -- . |\ ``` If you want to compress the data before it goes over the network -(which will cost you CPU cycles) you can replace `ssh` with `ssh --C`. +(which will cost you CPU cycles) you can replace `ssh` with `ssh -C`. ## The target directory contains an outdated copy of the repositories: use rsync -- cgit v1.2.1 From 6479b821ebc04d9e2ec69be451768968c06ce6a5 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Mon, 5 Oct 2015 18:02:12 +0200 Subject: Add RSYNC variable to parallel-rsync-repos --- bin/parallel-rsync-repos | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) mode change 100644 => 100755 bin/parallel-rsync-repos diff --git a/bin/parallel-rsync-repos b/bin/parallel-rsync-repos old mode 100644 new mode 100755 index b2429f743b5..b777056c95f --- a/bin/parallel-rsync-repos +++ b/bin/parallel-rsync-repos @@ -4,6 +4,15 @@ # Example invocation: # find /var/opt/gitlab/git-data/repositories -maxdepth 2 | \ # parallel-rsync-repos /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories +# +# You can also rsync to a remote destination. +# +# parallel-rsync-repos /var/opt/gitlab/git-data/repositories user@host:/mnt/gitlab/repositories +# +# If you need to pass extra options to rsync, set the RSYNC variable +# +# env RSYNC='rsync --rsh="foo bar"' parallel-rsync-repos /src dest +# SRC=$1 DEST=$2 @@ -17,10 +26,14 @@ if [ -z "$SRC" ] || [ -z "$DEST" ] ; then exit 1 fi +if [ -z "$RSYNC" ] ; then + RSYNC=rsync +fi + if ! cd $SRC ; then echo "cd $SRC failed" exit 1 fi sed "s|$SRC|./|" |\ - parallel -j$JOBS --progress "mkdir -p $DEST/{} && rsync --delete -a {}/. $DEST/{}/" + parallel -j$JOBS --progress "mkdir -p $DEST/{} && $RSYNC --delete -a {}/. $DEST/{}/" -- cgit v1.2.1 From e0ef09d9a35bf001acbb89e4177d942f6db93e50 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Mon, 5 Oct 2015 18:02:32 +0200 Subject: Some more text in the doc --- doc/operations/moving_repositories.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/doc/operations/moving_repositories.md b/doc/operations/moving_repositories.md index 88b90e91316..d156f3ad777 100644 --- a/doc/operations/moving_repositories.md +++ b/doc/operations/moving_repositories.md @@ -16,7 +16,10 @@ source and the target.** ## Target directory is empty: use a tar pipe If the target directory `/mnt/gitlab/repositories` is empty the -simplest thing to do is to use a tar pipe. +simplest thing to do is to use a tar pipe. This method has low +overhead and tar is almost always already installed on your system. +However, it is not possible to resume an interrupted tar pipe: if +that happens then all data must be copied again. ``` # As the git user @@ -43,9 +46,11 @@ If you want to compress the data before it goes over the network ## The target directory contains an outdated copy of the repositories: use rsync -In this scenario it is better to use rsync. This utility is either -already installed on your system or easily installable via apt, yum -etc. +If the target directory already contains a partial / outdated copy +of the repositories it may be wasteful to copy all the data again +with tar. In this scenario it is better to use rsync. This utility +is either already installed on your system or easily installable +via apt, yum etc. ``` # As the 'git' user -- cgit v1.2.1 From ad37f58ebd97fee3c06a531e4067c8adb4c9ecc7 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 6 Oct 2015 12:42:05 +0200 Subject: Add explanation about parallel-rsync-repos --- doc/operations/moving_repositories.md | 55 ++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/doc/operations/moving_repositories.md b/doc/operations/moving_repositories.md index d156f3ad777..a89602b367f 100644 --- a/doc/operations/moving_repositories.md +++ b/doc/operations/moving_repositories.md @@ -87,5 +87,58 @@ In addition to rsync we will use [GNU Parallel](http://www.gnu.org/software/parallel/). This utility is not included in GitLab so you need to install it yourself with apt or yum. Also note that the GitLab scripts we used below were added -in GitLab 8.???. +in GitLab 8.1. +** This process does not clean up repositories at the target location that no +longer exist at the source. ** If you start using your GitLab instance with +`/mnt/gitlab/repositories`, you need to run `gitlab-rake gitlab:cleanup:repos` +after switching to the new repository storage directory. + +### Parallel rsync for all repositories known to GitLab + +This will sync repositories with 10 rsync processes at a time. + +``` +# Omnibus +sudo gitlab-rake gitlab:list_repos |\ + sudo -u git \ + /usr/bin/env JOBS=10 \ + /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \ + /var/opt/gitlab/git-data/repositories \ + /mnt/gitlab/repositories + +# Source +cd /home/git/gitlab +sudo -u git -H bundle exec rake gitlab:list_repos |\ + sudo -u git -H \ + /usr/bin/env JOBS=10 \ + bin/parallel-rsync-repos \ + /home/git/repositories \ + /mnt/gitlab/repositories +``` + +### Parallel rsync only for repositories with recent activity + +Suppose you have already done one sync that started after 2015-10-1 12:00 UTC. +Then you might only want to sync repositories that were changed via GitLab +_after_ that time. You can use the 'SINCE' variable to tell 'rake +gitlab:list_repos' to only print repositories with recent activity. + +``` +# Omnibus +sudo gitlab-rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\ + sudo -u git \ + /usr/bin/env JOBS=10 \ + /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \ + /var/opt/gitlab/git-data/repositories \ + /mnt/gitlab/repositories + +# Source +cd /home/git/gitlab +sudo -u git -H bundle exec rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\ + sudo -u git -H \ + /usr/bin/env JOBS=10 \ + bin/parallel-rsync-repos \ + /home/git/repositories \ + /mnt/gitlab/repositories +``` -- cgit v1.2.1 From f3ca92a062424e0cda2c077d9c30a4edbd6bf4c8 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 8 Dec 2015 15:08:22 +0100 Subject: Add 'resume' capability to parallel-rsync-repos --- bin/parallel-rsync-repos | 43 ++++++++++++++++++++---------- doc/operations/moving_repositories.md | 50 ++++++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 21 deletions(-) diff --git a/bin/parallel-rsync-repos b/bin/parallel-rsync-repos index b777056c95f..21921148fa0 100755 --- a/bin/parallel-rsync-repos +++ b/bin/parallel-rsync-repos @@ -1,29 +1,31 @@ -#!/bin/sh -# this script should run as the 'git' user, not root, because of mkdir +#!/usr/bin/env bash +# this script should run as the 'git' user, not root, because 'root' should not +# own intermediate directories created by rsync. # # Example invocation: # find /var/opt/gitlab/git-data/repositories -maxdepth 2 | \ -# parallel-rsync-repos /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories +# parallel-rsync-repos transfer-success.log /var/opt/gitlab/git-data/repositories /mnt/gitlab/repositories # # You can also rsync to a remote destination. # -# parallel-rsync-repos /var/opt/gitlab/git-data/repositories user@host:/mnt/gitlab/repositories +# parallel-rsync-repos transfer-success.log /var/opt/gitlab/git-data/repositories user@host:/mnt/gitlab/repositories # # If you need to pass extra options to rsync, set the RSYNC variable # -# env RSYNC='rsync --rsh="foo bar"' parallel-rsync-repos /src dest +# env RSYNC='rsync --rsh="foo bar"' parallel-rsync-repos transfer-success.log /src dest # -SRC=$1 -DEST=$2 +LOGFILE=$1 +SRC=$2 +DEST=$3 -if [ -z "$JOBS" ] ; then - JOBS=10 +if [ -z "$LOGFILE" ] || [ -z "$SRC" ] || [ -z "$DEST" ] ; then + echo "Usage: $0 LOGFILE SRC DEST" + exit 1 fi -if [ -z "$SRC" ] || [ -z "$DEST" ] ; then - echo "Usage: $0 SRC DEST" - exit 1 +if [ -z "$JOBS" ] ; then + JOBS=10 fi if [ -z "$RSYNC" ] ; then @@ -35,5 +37,18 @@ if ! cd $SRC ; then exit 1 fi -sed "s|$SRC|./|" |\ - parallel -j$JOBS --progress "mkdir -p $DEST/{} && $RSYNC --delete -a {}/. $DEST/{}/" +rsyncjob() { + relative_dir="./${1#$SRC}" + + if ! $RSYNC --delete --relative -a "$relative_dir" "$DEST" ; then + echo "rsync $1 failed" + return 1 + fi + + echo "$1" >> $LOGFILE +} + +export LOGFILE SRC DEST RSYNC +export -f rsyncjob + +parallel -j$JOBS --progress rsyncjob diff --git a/doc/operations/moving_repositories.md b/doc/operations/moving_repositories.md index a89602b367f..39086b7a251 100644 --- a/doc/operations/moving_repositories.md +++ b/doc/operations/moving_repositories.md @@ -96,25 +96,59 @@ after switching to the new repository storage directory. ### Parallel rsync for all repositories known to GitLab -This will sync repositories with 10 rsync processes at a time. +This will sync repositories with 10 rsync processes at a time. We keep +track of progress so that the transfer can be restarted if necessary. + +First we create a new directory, owned by 'git', to hold transfer +logs. We assume the directory is empty before we start the transfer +procedure, and that we are the only ones writing files in it. ``` # Omnibus -sudo gitlab-rake gitlab:list_repos |\ - sudo -u git \ +sudo mkdir /var/opt/gitlab/transfer-logs +sudo chown git:git /var/opt/gitlab/transfer-logs + +# Source +sudo -u git -H mkdir /home/git/transfer-logs +``` + +We seed the process with a list of the directories we want to copy. + +``` +# Omnibus +sudo -u git sh -c 'gitlab-rake gitlab:list_repos > /var/opt/gitlab/transfer-logs/all-repos-$(date +%s).txt' + +# Source +cd /home/git/gitlab +sudo -u git -H sh -c 'bundle exec rake gitlab:list_repos > /home/git/transfer-logs/all-repos-$(date +%s).txt' +``` + +Now we can start the transfer. The command below is idempotent, and +the number of jobs done by GNU Parallel should converge to zero. If it +does not some repositories listed in all-repos-1234.txt may have been +deleted/renamed before they could be copied. + +``` +# Omnibus +sudo -u git sh -c ' +cat /var/opt/gitlab/transfer-logs/* | sort | uniq -u |\ /usr/bin/env JOBS=10 \ - /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \ + /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repos \ + /var/opt/gitlab/transfer-logs/succes-$(date +%s).log \ /var/opt/gitlab/git-data/repositories \ /mnt/gitlab/repositories +' # Source cd /home/git/gitlab -sudo -u git -H bundle exec rake gitlab:list_repos |\ - sudo -u git -H \ +sudo -u git -H sh -c ' +cat /home/git/transfer-logs/* | sort | uniq -u |\ /usr/bin/env JOBS=10 \ bin/parallel-rsync-repos \ + /home/git/transfer-logs/succes-$(date +%s).log \ /home/git/repositories \ /mnt/gitlab/repositories +` ``` ### Parallel rsync only for repositories with recent activity @@ -129,7 +163,8 @@ gitlab:list_repos' to only print repositories with recent activity. sudo gitlab-rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\ sudo -u git \ /usr/bin/env JOBS=10 \ - /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repoos \ + /opt/gitlab/embedded/service/gitlab-rails/bin/parallel-rsync-repos \ + succes-$(date +%s).log \ /var/opt/gitlab/git-data/repositories \ /mnt/gitlab/repositories @@ -139,6 +174,7 @@ sudo -u git -H bundle exec rake gitlab:list_repos SINCE='2015-10-1 12:00 UTC' |\ sudo -u git -H \ /usr/bin/env JOBS=10 \ bin/parallel-rsync-repos \ + succes-$(date +%s).log \ /home/git/repositories \ /mnt/gitlab/repositories ``` -- cgit v1.2.1 From 23f383ef69889c9829ad36afa53b5abfbf4b5511 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Tue, 8 Dec 2015 16:06:06 +0100 Subject: Detect project and namespace changes in list:repos --- lib/tasks/gitlab/list_repos.rake | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/tasks/gitlab/list_repos.rake b/lib/tasks/gitlab/list_repos.rake index 1377e1ea910..c7596e7abcb 100644 --- a/lib/tasks/gitlab/list_repos.rake +++ b/lib/tasks/gitlab/list_repos.rake @@ -3,9 +3,10 @@ namespace :gitlab do scope = Project if ENV['SINCE'] date = Time.parse(ENV['SINCE']) - warn "Listing repositories with activity since #{date}" - project_ids = Project.where(['last_activity_at > ?', date]).pluck(:id) - scope = scope.where(id: project_ids) + warn "Listing repositories with activity or changes since #{date}" + project_ids = Project.where('last_activity_at > ? OR updated_at > ?', date, date).pluck(:id).sort + namespace_ids = Namespace.where(['updated_at > ?', date]).pluck(:id).sort + scope = scope.where('id IN (?) OR namespace_id in (?)', project_ids, namespace_ids) end scope.find_each do |project| base = File.join(Gitlab.config.gitlab_shell.repos_path, project.path_with_namespace) -- cgit v1.2.1