summaryrefslogtreecommitdiff
path: root/perl/Git/SVN.pm
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2014-04-16 23:54:05 -0700
committerEric Wong <normalperson@yhbt.net>2014-10-24 22:55:26 +0000
commitabfef3bbf5f637c86032763632393ce1ffd23ccc (patch)
tree4138507c3c04e5692cdce4586ee077180c77e3b9 /perl/Git/SVN.pm
parentfbecd99861ea5795aeba46faf2ac7a8c1b70d485 (diff)
downloadgit-abfef3bbf5f637c86032763632393ce1ffd23ccc.tar.gz
git-svn: only look at the new parts of svn:mergeinfo
In a Subversion repository where many feature branches are merged into a trunk, the svn:mergeinfo property can grow very large. This severely slows down git-svn's make_log_entry() because it is checking all mergeinfo entries every time the property changes. In most cases, the additions to svn:mergeinfo since the last commit are pretty small, and there is nothing to gain by checking merges that were already checked for the last commit in the branch. Add a mergeinfo_changes() function which computes the set of interesting changes to svn:mergeinfo since the last commit. Filter out merged branches whose ranges haven't changed, and remove a common prefix of ranges from other merged branches. This speeds up "git svn fetch" by several orders of magnitude on a large repository where thousands of feature branches have been merged. Signed-off-by: Jakob Stoklund Olesen <stoklund@2pi.dk> Signed-off-by: Eric Wong <normalperson@yhbt.net>
Diffstat (limited to 'perl/Git/SVN.pm')
-rw-r--r--perl/Git/SVN.pm84
1 files changed, 72 insertions, 12 deletions
diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm
index 09cff135ef..abd51eadd2 100644
--- a/perl/Git/SVN.pm
+++ b/perl/Git/SVN.pm
@@ -1178,7 +1178,7 @@ sub find_parent_branch {
or die "SVN connection failed somewhere...\n";
}
print STDERR "Successfully followed parent\n" unless $::_q > 1;
- return $self->make_log_entry($rev, [$parent], $ed);
+ return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from);
}
return undef;
}
@@ -1210,7 +1210,7 @@ sub do_fetch {
unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
die "SVN connection failed somewhere...\n";
}
- $self->make_log_entry($rev, \@parents, $ed);
+ $self->make_log_entry($rev, \@parents, $ed, $last_rev);
}
sub mkemptydirs {
@@ -1478,9 +1478,9 @@ sub find_extra_svk_parents {
sub lookup_svn_merge {
my $uuid = shift;
my $url = shift;
- my $merge = shift;
+ my $source = shift;
+ my $revs = shift;
- my ($source, $revs) = split ":", $merge;
my $path = $source;
$path =~ s{^/}{};
my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
@@ -1702,6 +1702,62 @@ sub parents_exclude {
return @excluded;
}
+# Compute what's new in svn:mergeinfo.
+sub mergeinfo_changes {
+ my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_;
+ my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop;
+ my $old_minfo = {};
+
+ # Initialize cache on the first call.
+ unless (defined $self->{cached_mergeinfo_rev}) {
+ $self->{cached_mergeinfo_rev} = {};
+ $self->{cached_mergeinfo} = {};
+ }
+
+ my $cached_rev = $self->{cached_mergeinfo_rev}{$old_path};
+ if (defined $cached_rev && $cached_rev == $old_rev) {
+ $old_minfo = $self->{cached_mergeinfo}{$old_path};
+ } else {
+ my $ra = $self->ra;
+ # Give up if $old_path isn't in the repo.
+ # This is probably a merge on a subtree.
+ if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) {
+ warn "W: ignoring svn:mergeinfo on $old_path, ",
+ "directory didn't exist in r$old_rev\n";
+ return {};
+ }
+ my (undef, undef, $props) =
+ $self->ra->get_dir($old_path, $old_rev);
+ if (defined $props->{"svn:mergeinfo"}) {
+ my %omi = map {split ":", $_ } split "\n",
+ $props->{"svn:mergeinfo"};
+ $old_minfo = \%omi;
+ }
+ $self->{cached_mergeinfo}{$old_path} = $old_minfo;
+ $self->{cached_mergeinfo_rev}{$old_path} = $old_rev;
+ }
+
+ # Cache the new mergeinfo.
+ $self->{cached_mergeinfo}{$path} = \%minfo;
+ $self->{cached_mergeinfo_rev}{$path} = $rev;
+
+ my %changes = ();
+ foreach my $p (keys %minfo) {
+ my $a = $old_minfo->{$p} || "";
+ my $b = $minfo{$p};
+ # Omit merged branches whose ranges lists are unchanged.
+ next if $a eq $b;
+ # Remove any common range list prefix.
+ ($a ^ $b) =~ /^[\0]*/;
+ my $common_prefix = rindex $b, ",", $+[0] - 1;
+ $changes{$p} = substr $b, $common_prefix + 1;
+ }
+ print STDERR "Checking svn:mergeinfo changes since r$old_rev: ",
+ scalar(keys %minfo), " sources, ",
+ scalar(keys %changes), " changed\n";
+
+ return \%changes;
+}
# note: this function should only be called if the various dirprops
# have actually changed
@@ -1715,14 +1771,15 @@ sub find_extra_svn_parents {
# history. Then, we figure out which git revisions are in
# that tip, but not this revision. If all of those revisions
# are now marked as merge, we can add the tip as a parent.
- my @merges = split "\n", $mergeinfo;
+ my @merges = sort keys %$mergeinfo;
my @merge_tips;
my $url = $self->url;
my $uuid = $self->ra_uuid;
my @all_ranges;
for my $merge ( @merges ) {
my ($tip_commit, @ranges) =
- lookup_svn_merge( $uuid, $url, $merge );
+ lookup_svn_merge( $uuid, $url,
+ $merge, $mergeinfo->{$merge} );
unless (!$tip_commit or
grep { $_ eq $tip_commit } @$parents ) {
push @merge_tips, $tip_commit;
@@ -1738,8 +1795,9 @@ sub find_extra_svn_parents {
# check merge tips for new parents
my @new_parents;
for my $merge_tip ( @merge_tips ) {
- my $spec = shift @merges;
+ my $merge = shift @merges;
next unless $merge_tip and $excluded{$merge_tip};
+ my $spec = "$merge:$mergeinfo->{$merge}";
# check out 'new' tips
my $merge_base;
@@ -1770,7 +1828,7 @@ sub find_extra_svn_parents {
.@incomplete." commit(s) (eg $incomplete[0])\n";
} else {
warn
- "Found merge parent (svn:mergeinfo prop): ",
+ "Found merge parent ($spec): ",
$merge_tip, "\n";
push @new_parents, $merge_tip;
}
@@ -1797,7 +1855,7 @@ sub find_extra_svn_parents {
}
sub make_log_entry {
- my ($self, $rev, $parents, $ed) = @_;
+ my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_;
my $untracked = $self->get_untracked($ed);
my @parents = @$parents;
@@ -1809,10 +1867,12 @@ sub make_log_entry {
($ed, $props->{"svk:merge"}, \@parents);
}
if ( $props->{"svn:mergeinfo"} ) {
+ my $mi_changes = $self->mergeinfo_changes
+ ($parent_path || $path, $parent_rev,
+ $path, $rev,
+ $props->{"svn:mergeinfo"});
$self->find_extra_svn_parents
- ($ed,
- $props->{"svn:mergeinfo"},
- \@parents);
+ ($ed, $mi_changes, \@parents);
}
}