diff options
author | Matthias Urlichs <smurf@smurf.noris.de> | 2005-10-10 11:40:43 +0200 |
---|---|---|
committer | Matthias Urlichs <smurf@smurf.noris.de> | 2005-10-10 11:40:43 +0200 |
commit | eaf718f3ece277462de4e47391e5a965bbbaa297 (patch) | |
tree | 5b68436a0951be41262fe9a9b0426edaa25b5853 /git-svnimport.perl | |
parent | 89764f5d8b0ca8600e3a200e9f863c3c7a3ff5e8 (diff) | |
download | git-eaf718f3ece277462de4e47391e5a965bbbaa297.tar.gz |
New: git-svnimport.
As the name suggests, this script imports from SVN.
Only "normal" SVN repositories (with single trunk/, branches/, and tags/
subdrectories) are supported. Incremental imports require preserving
the file .git/svn2git.
Signed-Off-by: Matthias Urlichs <smurf@smurf.noris.de>
Diffstat (limited to 'git-svnimport.perl')
-rwxr-xr-x | git-svnimport.perl | 671 |
1 files changed, 671 insertions, 0 deletions
diff --git a/git-svnimport.perl b/git-svnimport.perl new file mode 100755 index 0000000000..08645f7ebb --- /dev/null +++ b/git-svnimport.perl @@ -0,0 +1,671 @@ +#!/usr/bin/perl -w + +# This tool is copyright (c) 2005, Matthias Urlichs. +# It is released under the Gnu Public License, version 2. +# +# The basic idea is to pull and analyze SVN changes. +# +# Checking out the files is done by a single long-running CVS connection +# / server process. +# +# The head revision is on branch "origin" by default. +# You can change that with the '-o' option. + +require v5.8.0; # for shell-safe open("-|",LIST) +use strict; +use warnings; +use Getopt::Std; +use File::Spec; +use File::Temp qw(tempfile); +use File::Path qw(mkpath); +use File::Basename qw(basename dirname); +use Time::Local; +use IO::Pipe; +use POSIX qw(strftime dup2); +use IPC::Open2; +use SVN::Core; +use SVN::Ra; + +$SIG{'PIPE'}="IGNORE"; +$ENV{'TZ'}="UTC"; + +our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b); + +sub usage() { + print STDERR <<END; +Usage: ${\basename $0} # fetch/update GIT from CVS + [-o branch-for-HEAD] [-h] [-v] + [-C GIT_repository] [-t tagname] [-T trunkname] [-b branchname] + [-i] [-u] [-s subst] [-m] [-M regex] [SVN_URL] +END + exit(1); +} + +getopts("b:C:hivmM:o:t:T:u") or usage(); +usage if $opt_h; + +my $tag_name = $opt_t || "tags"; +my $trunk_name = $opt_T || "trunk"; +my $branch_name = $opt_b || "branches"; + +@ARGV <= 1 or usage(); + +$opt_o ||= "origin"; +my $git_tree = $opt_C; +$git_tree ||= "."; + +my $cvs_tree; +if ($#ARGV == 0) { + $cvs_tree = $ARGV[0]; +} elsif (-f 'CVS/Repository') { + open my $f, '<', 'CVS/Repository' or + die 'Failed to open CVS/Repository'; + $cvs_tree = <$f>; + chomp $cvs_tree; + close $f; +} else { + usage(); +} + +our @mergerx = (); +if ($opt_m) { + @mergerx = ( qr/\W(?:from|of|merge|merging|merged) (\w+)/i ); +} +if ($opt_M) { + push (@mergerx, qr/$opt_M/); +} + +select(STDERR); $|=1; select(STDOUT); + + +package SVNconn; +# Basic SVN connection. +# We're only interested in connecting and downloading, so ... + +use File::Spec; +use File::Temp qw(tempfile); +use POSIX qw(strftime dup2); + +sub new { + my($what,$repo) = @_; + $what=ref($what) if ref($what); + + my $self = {}; + $self->{'buffer'} = ""; + bless($self,$what); + + $repo =~ s#/+$##; + $self->{'fullrep'} = $repo; + $self->conn(); + + $self->{'lines'} = undef; + + return $self; +} + +sub conn { + my $self = shift; + my $repo = $self->{'fullrep'}; + my $s = SVN::Ra->new($repo); + + die "SVN connection to $repo: $!\n" unless defined $s; + $self->{'svn'} = $s; + $self->{'repo'} = $repo; + $self->{'maxrev'} = $s->get_latest_revnum(); +} + +sub file { + my($self,$path,$rev) = @_; + my $res; + + my ($fh, $name) = tempfile('gitsvn.XXXXXX', + DIR => File::Spec->tmpdir(), UNLINK => 1); + + $self->{'svn'}->get_file($path,$rev,$fh) or do { + # retry + $self->conn(); + $self->{'svn'}->get_file($path,$rev,$fh) + or die "$rev: No file $path at $rev\n"; + }; + close ($fh); + + return ($name, $res); +} + + +package main; + +my $svn = SVNconn->new($cvs_tree); + + +sub pdate($) { + my($d) = @_; + $d =~ m#(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)# + or die "Unparseable date: $d\n"; + my $y=$1; $y-=1900 if $y>1900; + return timegm($6||0,$5,$4,$3,$2-1,$y); +} + +sub pmode($) { + my($mode) = @_; + my $m = 0; + my $mm = 0; + my $um = 0; + for my $x(split(//,$mode)) { + if($x eq ",") { + $m |= $mm&$um; + $mm = 0; + $um = 0; + } elsif($x eq "u") { $um |= 0700; + } elsif($x eq "g") { $um |= 0070; + } elsif($x eq "o") { $um |= 0007; + } elsif($x eq "r") { $mm |= 0444; + } elsif($x eq "w") { $mm |= 0222; + } elsif($x eq "x") { $mm |= 0111; + } elsif($x eq "=") { # do nothing + } else { die "Unknown mode: $mode\n"; + } + } + $m |= $mm&$um; + return $m; +} + +sub getwd() { + my $pwd = `pwd`; + chomp $pwd; + return $pwd; +} + + +sub get_headref($$) { + my $name = shift; + my $git_dir = shift; + my $sha; + + if (open(C,"$git_dir/refs/heads/$name")) { + chomp($sha = <C>); + close(C); + length($sha) == 40 + or die "Cannot get head id for $name ($sha): $!\n"; + } + return $sha; +} + + +-d $git_tree + or mkdir($git_tree,0777) + or die "Could not create $git_tree: $!"; +chdir($git_tree); + +my $orig_branch = ""; +my $forward_master = 0; +my %branches; + +my $git_dir = $ENV{"GIT_DIR"} || ".git"; +$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#; +$ENV{"GIT_DIR"} = $git_dir; +my $orig_git_index; +$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE}; +my ($git_ih, $git_index) = tempfile('gitXXXXXX', SUFFIX => '.idx', + DIR => File::Spec->tmpdir()); +close ($git_ih); +$ENV{GIT_INDEX_FILE} = $git_index; +my $maxnum = 0; +my $last_rev = ""; +my $last_branch; +my $current_rev = 0; +unless(-d $git_dir) { + system("git-init-db"); + die "Cannot init the GIT db at $git_tree: $?\n" if $?; + system("git-read-tree"); + die "Cannot init an empty tree: $?\n" if $?; + + $last_branch = $opt_o; + $orig_branch = ""; +} else { + -f "$git_dir/refs/heads/$opt_o" + or die "Branch '$opt_o' does not exist.\n". + "Either use the correct '-o branch' option,\n". + "or import to a new repository.\n"; + + -f "$git_dir/svn2git" + or die "'$git_dir/svn2git' does not exist.\n". + "You need that file for incremental imports.\n"; + $last_branch = basename(readlink("$git_dir/HEAD")); + unless($last_branch) { + warn "Cannot read the last branch name: $! -- assuming 'master'\n"; + $last_branch = "master"; + } + $orig_branch = $last_branch; + $last_rev = get_headref($orig_branch, $git_dir); + if (-f "$git_dir/SVN2GIT_HEAD") { + die <<EOM; +SVN2GIT_HEAD exists. +Make sure your working directory corresponds to HEAD and remove SVN2GIT_HEAD. +You may need to run + + git-read-tree -m -u SVN2GIT_HEAD HEAD +EOM + } + system('cp', "$git_dir/HEAD", "$git_dir/SVN2GIT_HEAD"); + + $forward_master = + $opt_o ne 'master' && -f "$git_dir/refs/heads/master" && + system('cmp', '-s', "$git_dir/refs/heads/master", + "$git_dir/refs/heads/$opt_o") == 0; + + # populate index + system('git-read-tree', $last_rev); + die "read-tree failed: $?\n" if $?; + + # Get the last import timestamps + open my $B,"<", "$git_dir/svn2git"; + while(<$B>) { + chomp; + my($num,$branch,$ref) = split; + $branches{$branch}{$num} = $ref; + $branches{$branch}{"LAST"} = $ref; + $current_rev = $num+1 if $current_rev < $num+1; + } + close($B); +} +-d $git_dir + or die "Could not create git subdir ($git_dir).\n"; + +open BRANCHES,">>", "$git_dir/svn2git"; + + +## cvsps output: +#--------------------- +#PatchSet 314 +#Date: 1999/09/18 13:03:59 +#Author: wkoch +#Branch: STABLE-BRANCH-1-0 +#Ancestor branch: HEAD +#Tag: (none) +#Log: +# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch +#Members: +# README:1.57->1.57.2.1 +# VERSION:1.96->1.96.2.1 +# +#--------------------- + +my $state = 0; + +sub get_file($$$) { + my($rev,$branch,$path) = @_; + + # revert split_path(), below + my $svnpath; + $path = "" if $path eq "/"; # this should not happen, but ... + if($branch eq "/") { + $svnpath = "/$trunk_name/$path"; + } elsif($branch =~ m#^/#) { + $svnpath = "/$tag_name$branch/$path"; + } else { + $svnpath = "/$branch_name/$branch/$path"; + } + + # now get it + my ($name, $res) = $svn->file($svnpath,$rev); + + open my $F, '-|', "git-hash-object -w $name" + or die "Cannot create object: $!\n"; + my $sha = <$F>; + chomp $sha; + close $F; + # my $mode = pmode($cvs->{'mode'}); + my $mode = "0644"; # SV does not seem to store any file modes + return [$mode, $sha, $path]; +} + +sub split_path($$) { + my($rev,$path) = @_; + my $branch; + + if($path =~ s#^/\Q$tag_name\E/([^/]+)/?##) { + $branch = "/$1"; + } elsif($path =~ s#^/\Q$trunk_name\E/?##) { + $branch = "/"; + } elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) { + $branch = $1; + } else { + print STDERR "$rev: Unrecognized path: $path\n"; + return () + } + $path = "/" if $path eq ""; + return ($branch,$path); +} + +sub commit { + my($branch, $changed_paths, $revision, $author, $date, $message) = @_; + my($author_name,$author_email,$dest); + my(@old,@new); + + if ($author =~ /^(.*?)\s+<(.*)>$/) { + ($author_name, $author_email) = ($1, $2); + } else { + $author =~ s/^<(.*)>$/$1/; + $author_name = $author_email = $author; + } + $date = pdate($date); + + my $tag; + my $parent; + if($branch eq "/") { # trunk + $parent = $opt_o; + } elsif($branch =~ m#^/(.+)#) { # tag + $tag = 1; + $parent = $1; + } else { # "normal" branch + # nothing to do + $parent = $branch; + } + $dest = $parent; + + my $prev = $changed_paths->{"/"}; + if($prev and $prev->action eq "A") { + delete $changed_paths->{"/"}; + my $oldpath = $prev->copyfrom_path; + my $rev; + if(defined $oldpath) { + my $p; + ($parent,$p) = split_path($revision,$oldpath); + if($parent eq "/") { + $parent = $opt_o; + } else { + $parent =~ s#^/##; # if it's a tag + } + } else { + $parent = undef; + } + } + + my $rev; + if(defined $parent) { + open(H,"git-rev-parse --verify $parent |"); + $rev = <H>; + close(H) or do { + print STDERR "$revision: cannot find commit '$parent'!\n"; + return; + }; + chop $rev; + if(length($rev) != 40) { + print STDERR "$revision: cannot find commit '$parent'!\n"; + return; + } + $rev = $branches{($parent eq $opt_o) ? "/" : $parent}{"LAST"}; + if($revision != 1 and not $rev) { + print STDERR "$revision: do not know ancestor for '$parent'!\n"; + return; + } + } else { + $rev = undef; + } + +# if($prev and $prev->action eq "A") { +# if(not $tag) { +# unless(open(H,"> $git_dir/refs/heads/$branch")) { +# print STDERR "$revision: Could not create branch $branch: $!\n"; +# $state=11; +# next; +# } +# print H "$rev\n" +# or die "Could not write branch $branch: $!"; +# close(H) +# or die "Could not write branch $branch: $!"; +# } +# } + if(not defined $rev) { + unlink($git_index); + } elsif ($rev ne $last_rev) { + print "Switching from $last_rev to $rev ($branch)\n" if $opt_v; + system("git-read-tree", $rev); + die "read-tree failed for $rev: $?\n" if $?; + $last_rev = $rev; + } + + while(my($path,$action) = each %$changed_paths) { + if ($action->action eq "A") { + my $f = get_file($revision,$branch,$path); + push(@new,$f) if $f; + } elsif ($action->action eq "D") { + push(@old,$path); + } elsif ($action->action eq "M") { + my $f = get_file($revision,$branch,$path); + push(@new,$f) if $f; + } elsif ($action->action eq "R") { + # refer to a file/tree in an earlier commit + push(@old,$path); # remove any old stuff + + # ... and add any new stuff + my($b,$p) = split_path($revision,$action->oldpath); + open my $F,"-|","git-ls-tree","-r","-z", $branches{$b}{$action->oldrev}, $p; + local $/ = '\0'; + while(<$F>) { + chomp; + my($m,$p) = split(/\t/,$_,2); + my($mode,$type,$sha1) = split(/ /,$m); + next if $type ne "blob"; + push(@new,[$mode,$sha1,$p]); + } + } else { + die "$revision: unknown action '".$action->action."' for $path\n"; + } + } + + if(@old) { + open F, "-│", "git-ls-files", "-z", @old or die $!; + @old = (); + local $/ = '\0'; + while(<F>) { + chomp; + push(@old,$_); + } + close(F); + + while(@old) { + my @o2; + if(@old > 55) { + @o2 = splice(@old,0,50); + } else { + @o2 = @old; + @old = (); + } + system("git-update-index","--force-remove","--",@o2); + die "Cannot remove files: $?\n" if $?; + } + } + while(@new) { + my @n2; + if(@new > 12) { + @n2 = splice(@new,0,10); + } else { + @n2 = @new; + @new = (); + } + system("git-update-index","--add", + (map { ('--cacheinfo', @$_) } @n2)); + die "Cannot add files: $?\n" if $?; + } + + my $pid = open(C,"-|"); + die "Cannot fork: $!" unless defined $pid; + unless($pid) { + exec("git-write-tree"); + die "Cannot exec git-write-tree: $!\n"; + } + chomp(my $tree = <C>); + length($tree) == 40 + or die "Cannot get tree id ($tree): $!\n"; + close(C) + or die "Error running git-write-tree: $?\n"; + print "Tree ID $tree\n" if $opt_v; + + my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n"; + my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n"; + $pid = fork(); + die "Fork: $!\n" unless defined $pid; + unless($pid) { + $pr->writer(); + $pw->reader(); + open(OUT,">&STDOUT"); + dup2($pw->fileno(),0); + dup2($pr->fileno(),1); + $pr->close(); + $pw->close(); + + my @par = (); + @par = ("-p",$rev) if defined $rev; + + # loose detection of merges + # based on the commit msg + foreach my $rx (@mergerx) { + if ($message =~ $rx) { + my $mparent = $1; + if ($mparent eq 'HEAD') { $mparent = $opt_o }; + if ( -e "$git_dir/refs/heads/$mparent") { + $mparent = get_headref($mparent, $git_dir); + push @par, '-p', $mparent; + print OUT "Merge parent branch: $mparent\n" if $opt_v; + } + } + } + + exec("env", + "GIT_AUTHOR_NAME=$author_name", + "GIT_AUTHOR_EMAIL=$author_email", + "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)), + "GIT_COMMITTER_NAME=$author_name", + "GIT_COMMITTER_EMAIL=$author_email", + "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)), + "git-commit-tree", $tree,@par); + die "Cannot exec git-commit-tree: $!\n"; + } + $pw->writer(); + $pr->reader(); + + $message =~ s/[\s\n]+\z//; + + print $pw "$message\n" + or die "Error writing to git-commit-tree: $!\n"; + $pw->close(); + + print "Committed change $revision:$branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v; + chomp(my $cid = <$pr>); + length($cid) == 40 + or die "Cannot get commit id ($cid): $!\n"; + print "Commit ID $cid\n" if $opt_v; + $pr->close(); + + waitpid($pid,0); + die "Error running git-commit-tree: $?\n" if $?; + + if(defined $dest) { + print "Writing to refs/heads/$dest\n" if $opt_v; + open(C,">$git_dir/refs/heads/$dest") and + print C ("$cid\n") and + close(C) + or die "Cannot write branch $dest for update: $!\n"; + } else { + print "... no known parent\n" if $opt_v; + } + $branches{$branch}{"LAST"} = $cid; + $branches{$branch}{$revision} = $cid; + $last_rev = $cid; + print BRANCHES "$revision $branch $cid\n"; + print "DONE: $revision $dest $cid\n" if $opt_v; + + if($tag) { + my($in, $out) = ('',''); + $last_rev = "-" if %$changed_paths; + # the tag was 'complex', i.e. did not refer to a "real" revision + + $tag =~ tr/_/\./ if $opt_u; + + my $pid = open2($in, $out, 'git-mktag'); + print $out ("object $cid\n". + "type commit\n". + "tag $tag\n". + "tagger $author_name <$author_email>\n") and + close($out) + or die "Cannot create tag object $tag: $!\n"; + + my $tagobj = <$in>; + chomp $tagobj; + + if ( !close($in) or waitpid($pid, 0) != $pid or + $? != 0 or $tagobj !~ /^[0123456789abcdef]{40}$/ ) { + die "Cannot create tag object $tag: $!\n"; + } + + + open(C,">$git_dir/refs/tags/$tag") + or die "Cannot create tag $tag: $!\n"; + print C "$tagobj\n" + or die "Cannot write tag $tag: $!\n"; + close(C) + or die "Cannot write tag $tag: $!\n"; + + print "Created tag '$tag' on '$branch'\n" if $opt_v; + } +} + +my ($changed_paths, $revision, $author, $date, $message, $pool) = @_; +sub _commit_all { + ($changed_paths, $revision, $author, $date, $message, $pool) = @_; +} +sub commit_all { + my %done; + my @col; + my $pref; + my $branch; + + while(my($path,$action) = each %$changed_paths) { + ($branch,$path) = split_path($revision,$path); + next if not defined $branch; + $done{$branch}{$path} = $action; + } + while(($branch,$changed_paths) = each %done) { + commit($branch, $changed_paths, $revision, $author, $date, $message); + } +} + +while(++$current_rev < $svn->{'maxrev'}) { + $svn->{'svn'}->get_log("/",$current_rev,$current_rev,$current_rev,1,1,\&_commit_all,""); + commit_all(); +} + + +unlink($git_index); + +if (defined $orig_git_index) { + $ENV{GIT_INDEX_FILE} = $orig_git_index; +} else { + delete $ENV{GIT_INDEX_FILE}; +} + +# Now switch back to the branch we were in before all of this happened +if($orig_branch) { + print "DONE\n" if $opt_v; + system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master") + if $forward_master; + unless ($opt_i) { + system('git-read-tree', '-m', '-u', 'SVN2GIT_HEAD', 'HEAD'); + die "read-tree failed: $?\n" if $?; + } +} else { + $orig_branch = "master"; + print "DONE; creating $orig_branch branch\n" if $opt_v; + system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master") + unless -f "$git_dir/refs/heads/master"; + unlink("$git_dir/HEAD"); + symlink("refs/heads/$orig_branch","$git_dir/HEAD"); + unless ($opt_i) { + system('git checkout'); + die "checkout failed: $?\n" if $?; + } +} +unlink("$git_dir/SVN2GIT_HEAD"); +close(BRANCHES); |