diff options
author | Junio C Hamano <junkio@cox.net> | 2005-05-22 21:26:09 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-05-23 11:49:30 -0700 |
commit | f7c1512af8ff4f821c530f9a4bc8f8ff25733d51 (patch) | |
tree | 78713f9ae744a9c5331298787d4c74f958e2e90f /diff.c | |
parent | 60896c7bfed67f1c7364595213ef9239642f83c5 (diff) | |
download | git-f7c1512af8ff4f821c530f9a4bc8f8ff25733d51.tar.gz |
[PATCH] Rename/copy detection fix.
The rename/copy detection logic in earlier round was only good
enough to show patch output and discussion on the mailing list
about the diff-raw format updates revealed many problems with
it. This patch fixes all the ones known to me, without making
things I want to do later impossible, mostly related to patch
reordering.
(1) Earlier rename/copy detector determined which one is rename
and which one is copy too early, which made it impossible
to later introduce diffcore transformers to reorder
patches. This patch fixes it by moving that logic to the
very end of the processing.
(2) Earlier output routine diff_flush() was pruning all the
"no-change" entries indiscriminatingly. This was done due
to my false assumption that one of the requirements in the
diff-raw output was not to show such an entry (which
resulted in my incorrect comment about "diff-helper never
being able to be equivalent to built-in diff driver"). My
special thanks go to Linus for correcting me about this.
When we produce diff-raw output, for the downstream to be
able to tell renames from copies, sometimes it _is_
necessary to output "no-change" entries, and this patch
adds diffcore_prune() function for doing it.
(3) Earlier diff_filepair structure was trying to be not too
specific about rename/copy operations, but the purpose of
the structure was to record one or two paths, which _was_
indeed about rename/copy. This patch discards xfrm_msg
field which was trying to be generic for this wrong reason,
and introduces a couple of fields (rename_score and
rename_rank) that are explicitly specific to rename/copy
logic. One thing to note is that the information in a
single diff_filepair structure _still_ does not distinguish
renames from copies, and it is deliberately so. This is to
allow patches to be reordered in later stages.
(4) This patch also adds some tests about diff-raw format
output and makes sure that necessary "no-change" entries
appear on the output.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'diff.c')
-rw-r--r-- | diff.c | 145 |
1 files changed, 98 insertions, 47 deletions
@@ -283,12 +283,6 @@ int diff_populate_filespec(struct diff_filespec *s) return 0; } -void diff_free_filepair(struct diff_filepair *p) -{ - free(p->xfrm_msg); - free(p); -} - void diff_free_filespec_data(struct diff_filespec *s) { if (s->should_free) @@ -501,9 +495,9 @@ struct diff_filepair *diff_queue(struct diff_queue_struct *queue, struct diff_filepair *dp = xmalloc(sizeof(*dp)); dp->one = one; dp->two = two; - dp->xfrm_msg = NULL; + dp->score = 0; dp->orig_order = queue->nr; - dp->xfrm_work = 0; + dp->rename_rank = 0; diff_q(queue, dp); return dp; } @@ -522,23 +516,7 @@ static void diff_flush_raw(struct diff_filepair *p) p->two->path, line_termination); } -static void diff_flush_patch(struct diff_filepair *p) -{ - const char *name, *other; - - name = p->one->path; - other = (strcmp(name, p->two->path) ? p->two->path : NULL); - if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) || - (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode))) - return; /* no tree diffs in patch format */ - - if (DIFF_PAIR_UNMERGED(p)) - run_external_diff(name, NULL, NULL, NULL, NULL); - else - run_external_diff(name, other, p->one, p->two, p->xfrm_msg); -} - -static int uninteresting(struct diff_filepair *p) +int diff_unmodified_pair(struct diff_filepair *p) { /* This function is written stricter than necessary to support * the currently implemented transformers, but the idea is to @@ -570,12 +548,70 @@ static int uninteresting(struct diff_filepair *p) return 0; } +static void diff_flush_patch(struct diff_filepair *p, const char *msg) +{ + const char *name, *other; + + /* diffcore_prune() keeps "stay" entries for diff-raw + * copy/rename detection, but when we are generating + * patches we do not need them. + */ + if (diff_unmodified_pair(p)) + return; + + name = p->one->path; + other = (strcmp(name, p->two->path) ? p->two->path : NULL); + if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) || + (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode))) + return; /* no tree diffs in patch format */ + + if (DIFF_PAIR_UNMERGED(p)) + run_external_diff(name, NULL, NULL, NULL, NULL); + else + run_external_diff(name, other, p->one, p->two, msg); +} + +int diff_needs_to_stay(struct diff_queue_struct *q, int i, + struct diff_filespec *it) +{ + /* If it will be used in later entry (either stay or used + * as the source of rename/copy), we need to copy, not rename. + */ + while (i < q->nr) { + struct diff_filepair *p = q->queue[i++]; + if (!DIFF_FILE_VALID(p->two)) + continue; /* removed is fine */ + if (strcmp(p->one->path, it->path)) + continue; /* not relevant */ + + /* p has its src set to *it and it is not a delete; + * it will be used for in-place change, rename/copy, + * or just stays there. We cannot rename it out. + */ + return 1; + } + return 0; +} + +static int diff_used_as_source(struct diff_queue_struct *q, int lim, + struct diff_filespec *it) +{ + int i; + for (i = 0; i < lim; i++) { + struct diff_filepair *p = q->queue[i++]; + if (!strcmp(p->one->path, it->path)) + return 1; + } + return 0; +} + void diffcore_prune(void) { /* * Although rename/copy detection wants to have "no-change" * entries fed into them, the downstream do not need to see - * them. This function removes such entries. + * them, unless we had rename/copy for the same path earlier. + * This function removes such entries. * * The applications that use rename/copy should: * @@ -585,6 +621,7 @@ void diffcore_prune(void) * (3) call diffcore_prune * (4) call other diffcore_xxx that do not need to see * "no-change" entries. + * (5) call diff_flush(). */ struct diff_queue_struct *q = &diff_queued_diff; struct diff_queue_struct outq; @@ -595,22 +632,21 @@ void diffcore_prune(void) for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; - if (!uninteresting(p)) + if (!diff_unmodified_pair(p) || + diff_used_as_source(q, i, p->one)) diff_q(&outq, p); else - diff_free_filepair(p); + free(p); } free(q->queue); *q = outq; return; } -static void diff_flush_one(struct diff_filepair *p) +static void diff_flush_one(struct diff_filepair *p, const char *msg) { - if (uninteresting(p)) - return; if (generate_patch) - diff_flush_patch(p); + diff_flush_patch(p, msg); else diff_flush_raw(p); } @@ -618,14 +654,7 @@ static void diff_flush_one(struct diff_filepair *p) int diff_queue_is_empty(void) { struct diff_queue_struct *q = &diff_queued_diff; - int i; - - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - if (!uninteresting(p)) - return 0; - } - return 1; + return q->nr == 0; } void diff_flush(int diff_output_style) @@ -646,13 +675,35 @@ void diff_flush(int diff_output_style) generate_patch = 1; break; } - for (i = 0; i < q->nr; i++) - diff_flush_one(q->queue[i]); + for (i = 0; i < q->nr; i++) { + char msg_[PATH_MAX*2+200], *msg = NULL; + struct diff_filepair *p = q->queue[i]; + if (strcmp(p->one->path, p->two->path)) { + /* This is rename or copy. Which one is it? */ + if (diff_needs_to_stay(q, i+1, p->one)) { + sprintf(msg_, + "similarity index %d%%\n" + "copy from %s\n" + "copy to %s\n", + (int)(0.5 + p->score * 100/MAX_SCORE), + p->one->path, p->two->path); + } + else + sprintf(msg_, + "similarity index %d%%\n" + "rename old %s\n" + "rename new %s\n", + (int)(0.5 + p->score * 100/MAX_SCORE), + p->one->path, p->two->path); + msg = msg_; + } + diff_flush_one(p, msg); + } + for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; diff_free_filespec_data(p->one); diff_free_filespec_data(p->two); - free(p->xfrm_msg); free(p); } free(q->queue); @@ -674,10 +725,10 @@ void diff_addremove(int addremove, unsigned mode, * entries to the diff-core. They will be prefixed * with something like '=' or '*' (I haven't decided * which but should not make any difference). - * Feeding the same new and old to diff_change() should - * also have the same effect. diff_flush() should - * filter uninteresting ones out at the final output - * stage. + * Feeding the same new and old to diff_change() + * also has the same effect. diffcore_prune() should + * be used to filter uninteresting ones out before the + * final output happens. */ if (reverse_diff) addremove = (addremove == '+' ? '-' : |