From 102fc37f3b3d213841d4cff47a75d385824a3027 Mon Sep 17 00:00:00 2001 From: Ryan Anderson Date: Sun, 26 Jun 2005 14:15:16 -0400 Subject: [PATCH] Add git-relink-script to fix up missing hardlinks This will scan 2 or more object repositories and look for common objects, check if they are hardlinked, and replace one with a hardlink to the other if not. This version warns when skipping files because of size differences, and handle more than 2 repositories automatically. Signed-off-by: Ryan Anderson Cheered-on-by: Jeff Garzik Acked-by: Junio C Hamano Signed-off-by: Linus Torvalds --- git-relink-script | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 git-relink-script (limited to 'git-relink-script') diff --git a/git-relink-script b/git-relink-script new file mode 100644 index 0000000000..f6b4f6a2f8 --- /dev/null +++ b/git-relink-script @@ -0,0 +1,173 @@ +#!/usr/bin/env perl +# Copyright 2005, Ryan Anderson +# Distribution permitted under the GPL v2, as distributed +# by the Free Software Foundation. +# Later versions of the GPL at the discretion of Linus Torvalds +# +# Scan two git object-trees, and hardlink any common objects between them. + +use 5.006; +use strict; +use warnings; +use Getopt::Long; + +sub get_canonical_form($); +sub do_scan_directory($$$); +sub compare_two_files($$); +sub usage(); +sub link_two_files($$); + +# stats +my $total_linked = 0; +my $total_already = 0; +my ($linked,$already); + +my $fail_on_different_sizes = 0; +my $help = 0; +GetOptions("safe" => \$fail_on_different_sizes, + "help" => \$help); + +usage() if $help; + +my (@dirs) = @ARGV; + +usage() if (!defined $dirs[0] || !defined $dirs[1]); + +$_ = get_canonical_form($_) foreach (@dirs); + +my $master_dir = pop @dirs; + +opendir(D,$master_dir . "objects/") + or die "Failed to open $master_dir/objects/ : $!"; + +my @hashdirs = grep !/^\.{1,2}$/, readdir(D); + +foreach my $repo (@dirs) { + $linked = 0; + $already = 0; + printf("Searching '%s' and '%s' for common objects and hardlinking them...\n", + $master_dir,$repo); + + foreach my $hashdir (@hashdirs) { + do_scan_directory($master_dir, $hashdir, $repo); + } + + printf("Linked %d files, %d were already linked.\n",$linked, $already); + + $total_linked += $linked; + $total_already += $already; +} + +printf("Totals: Linked %d files, %d were already linked.\n", + $total_linked, $total_already); + + +sub do_scan_directory($$$) { + my ($srcdir, $subdir, $dstdir) = @_; + + my $sfulldir = sprintf("%sobjects/%s/",$srcdir,$subdir); + my $dfulldir = sprintf("%sobjects/%s/",$dstdir,$subdir); + + opendir(S,$sfulldir) + or die "Failed to opendir $sfulldir: $!"; + + foreach my $file (grep(!/\.{1,2}$/, readdir(S))) { + my $sfilename = $sfulldir . $file; + my $dfilename = $dfulldir . $file; + + compare_two_files($sfilename,$dfilename); + + } + closedir(S); +} + +sub compare_two_files($$) { + my ($sfilename, $dfilename) = @_; + + # Perl's stat returns relevant information as follows: + # 0 = dev number + # 1 = inode number + # 7 = size + my @sstatinfo = stat($sfilename); + my @dstatinfo = stat($dfilename); + + if (@sstatinfo == 0 && @dstatinfo == 0) { + die sprintf("Stat of both %s and %s failed: %s\n",$sfilename, $dfilename, $!); + + } elsif (@dstatinfo == 0) { + return; + } + + if ( ($sstatinfo[0] == $dstatinfo[0]) && + ($sstatinfo[1] != $dstatinfo[1])) { + if ($sstatinfo[7] == $dstatinfo[7]) { + link_two_files($sfilename, $dfilename); + + } else { + my $err = sprintf("ERROR: File sizes are not the same, cannot relink %s to %s.\n", + $sfilename, $dfilename); + if ($fail_on_different_sizes) { + die $err; + } else { + warn $err; + } + } + + } elsif ( ($sstatinfo[0] == $dstatinfo[0]) && + ($sstatinfo[1] == $dstatinfo[1])) { + $already++; + } +} + +sub get_canonical_form($) { + my $dir = shift; + my $original = $dir; + + die "$dir is not a directory." unless -d $dir; + + $dir .= "/" unless $dir =~ m#/$#; + $dir .= ".git/" unless $dir =~ m#\.git/$#; + + die "$original does not have a .git/ subdirectory.\n" unless -d $dir; + + return $dir; +} + +sub link_two_files($$) { + my ($sfilename, $dfilename) = @_; + my $tmpdname = sprintf("%s.old",$dfilename); + rename($dfilename,$tmpdname) + or die sprintf("Failure renaming %s to %s: %s", + $dfilename, $tmpdname, $!); + + if (! link($sfilename,$dfilename)) { + my $failtxt = ""; + unless (rename($tmpdname,$dfilename)) { + $failtxt = sprintf( + "Git Repository containing %s is probably corrupted, " . + "please copy '%s' to '%s' to fix.\n", + $tmpdname, $dfilename); + } + + die sprintf("Failed to link %s to %s: %s\n%s" . + $sfilename, $dfilename, + $!, $dfilename, $failtxt); + } + + unlink($tmpdname) + or die sprintf("Unlink of %s failed: %s\n", + $dfilename, $!); + + $linked++; +} + + +sub usage() { + print("Usage: $0 [--safe] [ ...] \n"); + print("All directories should contain a .git/objects/ subdirectory.\n"); + print("Options\n"); + print("\t--safe\t" . + "Stops if two objects with the same hash exist but " . + "have different sizes. Default is to warn and continue.\n"); + exit(1); +} -- cgit v1.2.1