summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff King <peff@peff.net>2007-09-25 15:29:42 -0400
committerJunio C Hamano <gitster@pobox.com>2007-10-02 21:02:03 -0700
commiteede7b7d110e2c354235d7a3f6c8f1644b5120e5 (patch)
tree29288dc52049b52e81eeee8dc61b77c458a8c36c
parent2ff5e18a930ddaf03c77a60e52648e7b8b20fc8d (diff)
downloadgit-eede7b7d110e2c354235d7a3f6c8f1644b5120e5.tar.gz
diffcore-rename: cache file deltas
We find rename candidates by computing a fingerprint hash of each file, and then comparing those fingerprints. There are inherently O(n^2) comparisons, so it pays in CPU time to hoist the (rather expensive) computation of the fingerprint out of that loop (or to cache it once we have computed it once). Previously, we didn't keep the filespec information around because then we had the potential to consume a great deal of memory. However, instead of keeping all of the filespec data, we can instead just keep the fingerprint. This patch implements and uses diff_free_filespec_data_large to accomplish that goal. We also have to change estimate_similarity not to needlessly repopulate the filespec data when we already have the hash. Practical tests showed 4.5x speedup for a 10% memory usage increase. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--diff.c7
-rw-r--r--diffcore-rename.c7
-rw-r--r--diffcore.h1
3 files changed, 11 insertions, 4 deletions
diff --git a/diff.c b/diff.c
index 0ee9ea1c1b..35e3c61986 100644
--- a/diff.c
+++ b/diff.c
@@ -1675,7 +1675,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
return 0;
}
-void diff_free_filespec_data(struct diff_filespec *s)
+void diff_free_filespec_data_large(struct diff_filespec *s)
{
if (s->should_free)
free(s->data);
@@ -1686,6 +1686,11 @@ void diff_free_filespec_data(struct diff_filespec *s)
s->should_free = s->should_munmap = 0;
s->data = NULL;
}
+}
+
+void diff_free_filespec_data(struct diff_filespec *s)
+{
+ diff_free_filespec_data_large(s);
free(s->cnt_data);
s->cnt_data = NULL;
}
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 41b35c3a9e..4fc200064a 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -184,7 +184,8 @@ static int estimate_similarity(struct diff_filespec *src,
if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
return 0;
- if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+ if ((!src->cnt_data && diff_populate_filespec(src, 0))
+ || (!dst->cnt_data && diff_populate_filespec(dst, 0)))
return 0; /* error but caught downstream */
@@ -377,10 +378,10 @@ void diffcore_rename(struct diff_options *options)
m->score = estimate_similarity(one, two,
minimum_score);
m->name_score = basename_same(one, two);
- diff_free_filespec_data(one);
+ diff_free_filespec_data_large(one);
}
/* We do not need the text anymore */
- diff_free_filespec_data(two);
+ diff_free_filespec_data_large(two);
dst_cnt++;
}
/* cost matrix sorted by most to least similar pair */
diff --git a/diffcore.h b/diffcore.h
index eef17c4ca2..4bf175bda9 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -48,6 +48,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *,
extern int diff_populate_filespec(struct diff_filespec *, int);
extern void diff_free_filespec_data(struct diff_filespec *);
+extern void diff_free_filespec_data_large(struct diff_filespec *);
extern int diff_filespec_is_binary(struct diff_filespec *);
struct diff_filepair {