summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <junkio@cox.net>2006-03-02 22:11:25 -0800
committerJunio C Hamano <junkio@cox.net>2006-03-02 22:12:33 -0800
commit1706306a54cfb5f1bf65f2b054aab2a5a7dba8e7 (patch)
treebdbcdf6d3b00771cd7d0b8c01d421bef47b30cdc
parente29e1147e485654d90a0ea0fd5fb7151bb194265 (diff)
downloadgit-1706306a54cfb5f1bf65f2b054aab2a5a7dba8e7.tar.gz
diffcore-rename: similarity estimator fix.
The "similarity" logic was giving added material way too much negative weight. What we wanted to see was how similar the post-change image was compared to the pre-change image, so the natural definition of similarity is how much common things are there, relative to the post-change image's size. This simplifies things a lot. Signed-off-by: Junio C Hamano <junkio@cox.net>
-rw-r--r--diffcore-rename.c20
1 files changed, 8 insertions, 12 deletions
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 55cf1c37f3..625b589fb7 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -170,19 +170,15 @@ static int estimate_similarity(struct diff_filespec *src,
&src_copied, &literal_added))
return 0;
- /* Extent of damage */
- if (src->size + literal_added < src_copied)
- delta_size = 0;
- else
- delta_size = (src->size - src_copied) + literal_added;
-
- /*
- * Now we will give some score to it. 100% edit gets 0 points
- * and 0% edit gets MAX_SCORE points.
+ /* How similar are they?
+ * what percentage of material in dst are from source?
*/
- score = MAX_SCORE - (MAX_SCORE * delta_size / base_size);
- if (score < 0) return 0;
- if (MAX_SCORE < score) return MAX_SCORE;
+ if (dst->size < src_copied)
+ score = MAX_SCORE;
+ else if (!dst->size)
+ score = 0; /* should not happen */
+ else
+ score = src_copied * MAX_SCORE / dst->size;
return score;
}