From d0af663e42abfcd5be6f7c3db21a29e521aa4ca2 Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Thu, 16 May 2013 18:32:34 +0300 Subject: revision.c: Make --full-history consider more merges History simplification previously always treated merges as TREESAME if they were TREESAME to any parent. While this was consistent with the default behaviour, this could be extremely unhelpful when searching detailed history, and could not be overridden. For example, if a merge had ignored a change, as if by "-s ours", then: git log -m -p --full-history -Schange file would successfully locate "change"'s addition but would not locate the merge that resolved against it. Futher, simplify_merges could drop the actual parent that a commit was TREESAME to, leaving it as a normal commit marked TREESAME that isn't actually TREESAME to its remaining parent. Now redefine a commit's TREESAME flag to be true only if a commit is TREESAME to _all_ of its parents. This doesn't affect either the default simplify_history behaviour (because partially TREESAME merges are turned into normal commits), or full-history with parent rewriting (because all merges are output). But it does affect other modes. The clearest difference is that --full-history will show more merges - sufficient to ensure that -m -p --full-history log searches can really explain every change to the file, including those changes' ultimate fate in merges. Also modify simplify_merges to recalculate TREESAME after removing a parent. This is achieved by storing per-parent TREESAME flags on the initial scan, so the combined flag can be easily recomputed. This fixes some t6111 failures, but creates a couple of new ones - we are now showing some merges that don't need to be shown. Signed-off-by: Kevin Bracey Signed-off-by: Junio C Hamano --- revision.c | 241 +++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 211 insertions(+), 30 deletions(-) (limited to 'revision.c') diff --git a/revision.c b/revision.c index 7f7a8ab7cb..64b86ae91e 100644 --- a/revision.c +++ b/revision.c @@ -429,10 +429,100 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit) return retval >= 0 && (tree_difference == REV_TREE_SAME); } +struct treesame_state { + unsigned int nparents; + unsigned char treesame[FLEX_ARRAY]; +}; + +static struct treesame_state *initialise_treesame(struct rev_info *revs, struct commit *commit) +{ + unsigned n = commit_list_count(commit->parents); + struct treesame_state *st = xcalloc(1, sizeof(*st) + n); + st->nparents = n; + add_decoration(&revs->treesame, &commit->object, st); + return st; +} + +/* + * Must be called immediately after removing the nth_parent from a commit's + * parent list, if we are maintaining the per-parent treesame[] decoration. + * This does not recalculate the master TREESAME flag - update_treesame() + * should be called to update it after a sequence of treesame[] modifications + * that may have affected it. + */ +static int compact_treesame(struct rev_info *revs, struct commit *commit, unsigned nth_parent) +{ + struct treesame_state *st; + int old_same; + + if (!commit->parents) { + /* + * Have just removed the only parent from a non-merge. + * Different handling, as we lack decoration. + */ + if (nth_parent != 0) + die("compact_treesame %u", nth_parent); + old_same = !!(commit->object.flags & TREESAME); + if (rev_same_tree_as_empty(revs, commit)) + commit->object.flags |= TREESAME; + else + commit->object.flags &= ~TREESAME; + return old_same; + } + + st = lookup_decoration(&revs->treesame, &commit->object); + if (!st || nth_parent >= st->nparents) + die("compact_treesame %u", nth_parent); + + old_same = st->treesame[nth_parent]; + memmove(st->treesame + nth_parent, + st->treesame + nth_parent + 1, + st->nparents - nth_parent - 1); + + /* + * If we've just become a non-merge commit, update TREESAME + * immediately, and remove the no-longer-needed decoration. + * If still a merge, defer update until update_treesame(). + */ + if (--st->nparents == 1) { + if (commit->parents->next) + die("compact_treesame parents mismatch"); + if (st->treesame[0] && revs->dense) + commit->object.flags |= TREESAME; + else + commit->object.flags &= ~TREESAME; + free(add_decoration(&revs->treesame, &commit->object, NULL)); + } + + return old_same; +} + +static unsigned update_treesame(struct rev_info *revs, struct commit *commit) +{ + if (commit->parents && commit->parents->next) { + unsigned n; + struct treesame_state *st; + + st = lookup_decoration(&revs->treesame, &commit->object); + if (!st) + die("update_treesame %s", sha1_to_hex(commit->object.sha1)); + commit->object.flags |= TREESAME; + for (n = 0; n < st->nparents; n++) { + if (!st->treesame[n]) { + commit->object.flags &= ~TREESAME; + break; + } + } + } + + return commit->object.flags & TREESAME; +} + static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) { struct commit_list **pp, *parent; - int tree_changed = 0, tree_same = 0, nth_parent = 0; + struct treesame_state *ts = NULL; + int tree_changed = 0, nth_parent; /* * If we don't do pruning, everything is interesting @@ -456,25 +546,43 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) if (!revs->dense && !commit->parents->next) return; - pp = &commit->parents; - while ((parent = *pp) != NULL) { + for (pp = &commit->parents, nth_parent = 0; + (parent = *pp) != NULL; + pp = &parent->next, nth_parent++) { struct commit *p = parent->item; - /* - * Do not compare with later parents when we care only about - * the first parent chain, in order to avoid derailing the - * traversal to follow a side branch that brought everything - * in the path we are limited to by the pathspec. - */ - if (revs->first_parent_only && nth_parent++) - break; + if (nth_parent == 1) { + /* + * This our second loop iteration - so we now know + * we're dealing with a merge. + * + * Do not compare with later parents when we care only about + * the first parent chain, in order to avoid derailing the + * traversal to follow a side branch that brought everything + * in the path we are limited to by the pathspec. + */ + if (revs->first_parent_only) + break; + /* + * If this will remain a potentially-simplifiable + * merge, remember per-parent treesame if needed. + * Initialise the array with the comparison from our + * first iteration. + */ + if (revs->treesame.name && + !revs->simplify_history && + !(commit->object.flags & UNINTERESTING)) { + ts = initialise_treesame(revs, commit); + if (!tree_changed) + ts->treesame[0] = 1; + } + } if (parse_commit(p) < 0) die("cannot simplify commit %s (because of %s)", sha1_to_hex(commit->object.sha1), sha1_to_hex(p->object.sha1)); switch (rev_compare_tree(revs, p, commit)) { case REV_TREE_SAME: - tree_same = 1; if (!revs->simplify_history || (p->object.flags & UNINTERESTING)) { /* Even if a merge with an uninteresting * side branch brought the entire change @@ -482,7 +590,8 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) * to lose the other branches of this * merge, so we just keep going. */ - pp = &parent->next; + if (ts) + ts->treesame[nth_parent] = 1; continue; } parent->next = NULL; @@ -511,12 +620,11 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) case REV_TREE_OLD: case REV_TREE_DIFFERENT: tree_changed = 1; - pp = &parent->next; continue; } die("bad tree compare for commit %s", sha1_to_hex(commit->object.sha1)); } - if (tree_changed && !tree_same) + if (tree_changed) return; commit->object.flags |= TREESAME; } @@ -1947,28 +2055,32 @@ static void add_child(struct rev_info *revs, struct commit *parent, struct commi l->next = add_decoration(&revs->children, &parent->object, l); } -static int remove_duplicate_parents(struct commit *commit) +static int remove_duplicate_parents(struct rev_info *revs, struct commit *commit) { + struct treesame_state *ts = lookup_decoration(&revs->treesame, &commit->object); struct commit_list **pp, *p; int surviving_parents; /* Examine existing parents while marking ones we have seen... */ pp = &commit->parents; + surviving_parents = 0; while ((p = *pp) != NULL) { struct commit *parent = p->item; if (parent->object.flags & TMP_MARK) { *pp = p->next; + if (ts) + compact_treesame(revs, commit, surviving_parents); continue; } parent->object.flags |= TMP_MARK; + surviving_parents++; pp = &p->next; } - /* count them while clearing the temporary mark */ - surviving_parents = 0; + /* clear the temporary mark */ for (p = commit->parents; p; p = p->next) { p->item->object.flags &= ~TMP_MARK; - surviving_parents++; } + /* no update_treesame() - removing duplicates can't affect TREESAME */ return surviving_parents; } @@ -1988,6 +2100,70 @@ static struct merge_simplify_state *locate_simplify_state(struct rev_info *revs, return st; } +static int mark_redundant_parents(struct rev_info *revs, struct commit *commit) +{ + struct commit_list *h = reduce_heads(commit->parents); + int i = 0, marked = 0; + struct commit_list *po, *pn; + + /* Want these for sanity-checking only */ + int orig_cnt = commit_list_count(commit->parents); + int cnt = commit_list_count(h); + + /* + * Not ready to remove items yet, just mark them for now, based + * on the output of reduce_heads(). reduce_heads outputs the reduced + * set in its original order, so this isn't too hard. + */ + po = commit->parents; + pn = h; + while (po) { + if (pn && po->item == pn->item) { + pn = pn->next; + i++; + } else { + po->item->object.flags |= TMP_MARK; + marked++; + } + po=po->next; + } + + if (i != cnt || cnt+marked != orig_cnt) + die("mark_redundant_parents %d %d %d %d", orig_cnt, cnt, i, marked); + + free_commit_list(h); + + return marked; +} + +static int remove_marked_parents(struct rev_info *revs, struct commit *commit) +{ + struct commit_list **pp, *p; + int nth_parent, removed = 0; + + pp = &commit->parents; + nth_parent = 0; + while ((p = *pp) != NULL) { + struct commit *parent = p->item; + if (parent->object.flags & TMP_MARK) { + parent->object.flags &= ~TMP_MARK; + *pp = p->next; + free(p); + removed++; + compact_treesame(revs, commit, nth_parent); + continue; + } + pp = &p->next; + nth_parent++; + } + + /* Removing parents can only increase TREESAMEness */ + if (removed && !(commit->object.flags & TREESAME)) + update_treesame(revs, commit); + + return nth_parent; +} + static struct commit_list **simplify_one(struct rev_info *revs, struct commit *commit, struct commit_list **tail) { struct commit_list *p; @@ -2032,7 +2208,9 @@ static struct commit_list **simplify_one(struct rev_info *revs, struct commit *c } /* - * Rewrite our list of parents. + * Rewrite our list of parents. Note that this cannot + * affect our TREESAME flags in any way - a commit is + * always TREESAME to its simplification. */ for (p = commit->parents; p; p = p->next) { pst = locate_simplify_state(revs, p->item); @@ -2044,31 +2222,30 @@ static struct commit_list **simplify_one(struct rev_info *revs, struct commit *c if (revs->first_parent_only) cnt = 1; else - cnt = remove_duplicate_parents(commit); + cnt = remove_duplicate_parents(revs, commit); /* * It is possible that we are a merge and one side branch * does not have any commit that touches the given paths; - * in such a case, the immediate parents will be rewritten - * to different commits. + * in such a case, the immediate parent from that branch + * will be rewritten to be the merge base. * * o----X X: the commit we are looking at; * / / o: a commit that touches the paths; * ---o----' * - * Further reduce the parents by removing redundant parents. + * Detect and simplify this case. */ if (1 < cnt) { - struct commit_list *h = reduce_heads(commit->parents); - cnt = commit_list_count(h); - free_commit_list(commit->parents); - commit->parents = h; + int marked = mark_redundant_parents(revs, commit); + if (marked) + cnt = remove_marked_parents(revs, commit); } /* * A commit simplifies to itself if it is a root, if it is * UNINTERESTING, if it touches the given paths, or if it is a - * merge and its parents simplifies to more than one commits + * merge and its parents simplify to more than one commit * (the first two cases are already handled at the beginning of * this function). * @@ -2176,6 +2353,10 @@ int prepare_revision_walk(struct rev_info *revs) if (!revs->leak_pending) free(list); + /* Signal whether we need per-parent treesame decoration */ + if (revs->simplify_merges) + revs->treesame.name = "treesame"; + if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED) commit_list_sort_by_date(&revs->commits); if (revs->no_walk) @@ -2235,7 +2416,7 @@ static int rewrite_parents(struct rev_info *revs, struct commit *commit) } pp = &parent->next; } - remove_duplicate_parents(commit); + remove_duplicate_parents(revs, commit); return 0; } -- cgit v1.2.1 From 9c129eab99f16d7bb174b35573e68ae6a5d02efe Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Thu, 16 May 2013 18:32:36 +0300 Subject: simplify-merges: never remove all TREESAME parents When simplifying an odd merge, such as one that used "-s ours", we may find ourselves TREESAME to apparently redundant parents. Prevent simplify_merges() from removing every TREESAME parent; if this would happen reinstate the first TREESAME parent - the one that the default log would have followed. This avoids producing a totally disjoint history from the default log when the default log is a better explanation of the end result, and aids visualisation of odd merges. Signed-off-by: Kevin Bracey Signed-off-by: Junio C Hamano --- revision.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'revision.c') diff --git a/revision.c b/revision.c index 64b86ae91e..62f399c22f 100644 --- a/revision.c +++ b/revision.c @@ -2136,6 +2136,73 @@ static int mark_redundant_parents(struct rev_info *revs, struct commit *commit) return marked; } +/* + * Awkward naming - this means one parent we are TREESAME to. + * cf mark_treesame_root_parents: root parents that are TREESAME (to an + * empty tree). Better name suggestions? + */ +static int leave_one_treesame_to_parent(struct rev_info *revs, struct commit *commit) +{ + struct treesame_state *ts = lookup_decoration(&revs->treesame, &commit->object); + struct commit *unmarked = NULL, *marked = NULL; + struct commit_list *p; + unsigned n; + + for (p = commit->parents, n = 0; p; p = p->next, n++) { + if (ts->treesame[n]) { + if (p->item->object.flags & TMP_MARK) { + if (!marked) + marked = p->item; + } else { + if (!unmarked) { + unmarked = p->item; + break; + } + } + } + } + + /* + * If we are TREESAME to a marked-for-deletion parent, but not to any + * unmarked parents, unmark the first TREESAME parent. This is the + * parent that the default simplify_history==1 scan would have followed, + * and it doesn't make sense to omit that path when asking for a + * simplified full history. Retaining it improves the chances of + * understanding odd missed merges that took an old version of a file. + * + * Example: + * + * I--------*X A modified the file, but mainline merge X used + * \ / "-s ours", so took the version from I. X is + * `-*A--' TREESAME to I and !TREESAME to A. + * + * Default log from X would produce "I". Without this check, + * --full-history --simplify-merges would produce "I-A-X", showing + * the merge commit X and that it changed A, but not making clear that + * it had just taken the I version. With this check, the topology above + * is retained. + * + * Note that it is possible that the simplification chooses a different + * TREESAME parent from the default, in which case this test doesn't + * activate, and we _do_ drop the default parent. Example: + * + * I------X A modified the file, but it was reverted in B, + * \ / meaning mainline merge X is TREESAME to both + * *A-*B parents. + * + * Default log would produce "I" by following the first parent; + * --full-history --simplify-merges will produce "I-A-B". But this is a + * reasonable result - it presents a logical full history leading from + * I to X, and X is not an important merge. + */ + if (!unmarked && marked) { + marked->object.flags &= ~TMP_MARK; + return 1; + } + + return 0; +} + static int remove_marked_parents(struct rev_info *revs, struct commit *commit) { struct commit_list **pp, *p; @@ -2238,6 +2305,8 @@ static struct commit_list **simplify_one(struct rev_info *revs, struct commit *c */ if (1 < cnt) { int marked = mark_redundant_parents(revs, commit); + if (marked) + marked -= leave_one_treesame_to_parent(revs, commit); if (marked) cnt = remove_marked_parents(revs, commit); } -- cgit v1.2.1 From 143f1eafdb0d326b1a8e3d15bf59a497b9440bf1 Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Thu, 16 May 2013 18:32:37 +0300 Subject: simplify-merges: drop merge from irrelevant side branch Reimplement commit 4b7f53da on top of the new simplify-merges infrastructure, tightening the condition to only consider root parents; the original version incorrectly dropped parents that were TREESAME to anything. Original log message follows. The merge simplification rule stated in 6546b59 (revision traversal: show full history with merge simplification, 2008-07-31) still treated merge commits too specially. Namely, in a history with this shape: ---o---o---M / x---x---x where three 'x' were on a history completely unrelated to the main history 'o' and do not touch any of the paths we are following, we still said that after simplifying all of the parents of M, 'x' (which is the leftmost 'x' that rightmost 'x simplifies down to) and 'o' (which would be the last commit on the main history that touches the paths we are following) are independent from each other, and both need to be kept. That is incorrect; when the side branch 'x' never touches the paths, it should be removed to allow M to simplify down to the last commit on the main history that touches the paths. Suggested-by: Junio C Hamano Signed-off-by: Kevin Bracey Signed-off-by: Junio C Hamano --- revision.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'revision.c') diff --git a/revision.c b/revision.c index 62f399c22f..4f7446c359 100644 --- a/revision.c +++ b/revision.c @@ -2136,6 +2136,22 @@ static int mark_redundant_parents(struct rev_info *revs, struct commit *commit) return marked; } +static int mark_treesame_root_parents(struct rev_info *revs, struct commit *commit) +{ + struct commit_list *p; + int marked = 0; + + for (p = commit->parents; p; p = p->next) { + struct commit *parent = p->item; + if (!parent->parents && (parent->object.flags & TREESAME)) { + parent->object.flags |= TMP_MARK; + marked++; + } + } + + return marked; +} + /* * Awkward naming - this means one parent we are TREESAME to. * cf mark_treesame_root_parents: root parents that are TREESAME (to an @@ -2301,10 +2317,18 @@ static struct commit_list **simplify_one(struct rev_info *revs, struct commit *c * / / o: a commit that touches the paths; * ---o----' * - * Detect and simplify this case. + * Further, a merge of an independent branch that doesn't + * touch the path will reduce to a treesame root parent: + * + * ----o----X X: the commit we are looking at; + * / o: a commit that touches the paths; + * r r: a root commit not touching the paths + * + * Detect and simplify both cases. */ if (1 < cnt) { int marked = mark_redundant_parents(revs, commit); + marked += mark_treesame_root_parents(revs, commit); if (marked) marked -= leave_one_treesame_to_parent(revs, commit); if (marked) -- cgit v1.2.1 From 7f34a46ff57121910c9ad697c2809a2d01ed1673 Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Thu, 16 May 2013 18:32:38 +0300 Subject: revision.c: add BOTTOM flag for commits When performing edge-based operations on the revision graph, it can be useful to be able to identify the INTERESTING graph's connection(s) to the bottom commit(s) specified by the user. Conceptually when the user specifies "A..B" (== B ^A), they are asking for the history from A to B. The first connection from A onto the INTERESTING graph is part of that history, and should be considered. If we consider only INTERESTING nodes and their connections, then we're really only considering the history from A's immediate descendants to B. This patch does not change behaviour, but adds a new BOTTOM flag to indicate the bottom commits specified by the user, ready to be used by following patches. We immediately use the BOTTOM flag to return collect_bottom_commits() to its original approach of examining the pending commit list rather than the command line. This will ensure alignment of the definition of "bottom" with future patches. Signed-off-by: Kevin Bracey Signed-off-by: Junio C Hamano --- revision.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'revision.c') diff --git a/revision.c b/revision.c index 4f7446c359..6607dabcc1 100644 --- a/revision.c +++ b/revision.c @@ -909,16 +909,12 @@ static void limit_to_ancestry(struct commit_list *bottom, struct commit_list *li * to filter the result of "A..B" further to the ones that can actually * reach A. */ -static struct commit_list *collect_bottom_commits(struct rev_info *revs) +static struct commit_list *collect_bottom_commits(struct commit_list *list) { - struct commit_list *bottom = NULL; - int i; - for (i = 0; i < revs->cmdline.nr; i++) { - struct rev_cmdline_entry *elem = &revs->cmdline.rev[i]; - if ((elem->flags & UNINTERESTING) && - elem->item->type == OBJ_COMMIT) - commit_list_insert((struct commit *)elem->item, &bottom); - } + struct commit_list *elem, *bottom = NULL; + for (elem = list; elem; elem = elem->next) + if (elem->item->object.flags & BOTTOM) + commit_list_insert(elem->item, &bottom); return bottom; } @@ -949,7 +945,7 @@ static int limit_list(struct rev_info *revs) struct commit_list *bottom = NULL; if (revs->ancestry_path) { - bottom = collect_bottom_commits(revs); + bottom = collect_bottom_commits(list); if (!bottom) die("--ancestry-path given but there are no bottom commits"); } @@ -1121,7 +1117,7 @@ static int add_parents_only(struct rev_info *revs, const char *arg_, int flags) const char *arg = arg_; if (*arg == '^') { - flags ^= UNINTERESTING; + flags ^= UNINTERESTING | BOTTOM; arg++; } if (get_sha1_committish(arg, sha1)) @@ -1213,8 +1209,8 @@ static void prepare_show_merge(struct rev_info *revs) add_pending_object(revs, &head->object, "HEAD"); add_pending_object(revs, &other->object, "MERGE_HEAD"); bases = get_merge_bases(head, other, 1); - add_rev_cmdline_list(revs, bases, REV_CMD_MERGE_BASE, UNINTERESTING); - add_pending_commit_list(revs, bases, UNINTERESTING); + add_rev_cmdline_list(revs, bases, REV_CMD_MERGE_BASE, UNINTERESTING | BOTTOM); + add_pending_commit_list(revs, bases, UNINTERESTING | BOTTOM); free_commit_list(bases); head->object.flags |= SYMMETRIC_LEFT; @@ -1250,13 +1246,15 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi int cant_be_filename = revarg_opt & REVARG_CANNOT_BE_FILENAME; unsigned get_sha1_flags = 0; + flags = flags & UNINTERESTING ? flags | BOTTOM : flags & ~BOTTOM; + dotdot = strstr(arg, ".."); if (dotdot) { unsigned char from_sha1[20]; const char *next = dotdot + 2; const char *this = arg; int symmetric = *next == '.'; - unsigned int flags_exclude = flags ^ UNINTERESTING; + unsigned int flags_exclude = flags ^ (UNINTERESTING | BOTTOM); static const char head_by_default[] = "HEAD"; unsigned int a_flags; @@ -1332,13 +1330,13 @@ int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsi dotdot = strstr(arg, "^!"); if (dotdot && !dotdot[2]) { *dotdot = 0; - if (!add_parents_only(revs, arg, flags ^ UNINTERESTING)) + if (!add_parents_only(revs, arg, flags ^ (UNINTERESTING | BOTTOM))) *dotdot = '^'; } local_flags = 0; if (*arg == '^') { - local_flags = UNINTERESTING; + local_flags = UNINTERESTING | BOTTOM; arg++; } @@ -1815,7 +1813,7 @@ static int handle_revision_pseudo_opt(const char *submodule, handle_refs(submodule, revs, *flags, for_each_branch_ref_submodule); } else if (!strcmp(arg, "--bisect")) { handle_refs(submodule, revs, *flags, for_each_bad_bisect_ref); - handle_refs(submodule, revs, *flags ^ UNINTERESTING, for_each_good_bisect_ref); + handle_refs(submodule, revs, *flags ^ (UNINTERESTING | BOTTOM), for_each_good_bisect_ref); revs->bisect = 1; } else if (!strcmp(arg, "--tags")) { handle_refs(submodule, revs, *flags, for_each_tag_ref_submodule); @@ -1841,7 +1839,7 @@ static int handle_revision_pseudo_opt(const char *submodule, } else if (!strcmp(arg, "--reflog")) { handle_reflog(revs, *flags); } else if (!strcmp(arg, "--not")) { - *flags ^= UNINTERESTING; + *flags ^= UNINTERESTING | BOTTOM; } else if (!strcmp(arg, "--no-walk")) { revs->no_walk = REVISION_WALK_NO_WALK_SORTED; } else if (!prefixcmp(arg, "--no-walk=")) { -- cgit v1.2.1 From 4d826608e9610851d29440ca290e54b701921608 Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Thu, 16 May 2013 18:32:39 +0300 Subject: revision.c: discount side branches when computing TREESAME Use the BOTTOM flag to define relevance for pruning. Relevant commits are those that are !UNINTERESTING or BOTTOM, and this allows us to identify irrelevant side branches (UNINTERESTING && !BOTTOM). If a merge has relevant parents, and it is TREESAME to them, then do not let irrelevant parents cause the merge to be treated as !TREESAME. When considering simplification, don't always include all merges - merges with exactly one relevant parent can be simplified, if TREESAME according to the above rule. These two changes greatly increase simplification in limited, pruned revision lists. Signed-off-by: Kevin Bracey Signed-off-by: Junio C Hamano --- revision.c | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 150 insertions(+), 21 deletions(-) (limited to 'revision.c') diff --git a/revision.c b/revision.c index 6607dabcc1..1c750705d8 100644 --- a/revision.c +++ b/revision.c @@ -332,6 +332,80 @@ static int everybody_uninteresting(struct commit_list *orig) return 1; } +/* + * A definition of "relevant" commit that we can use to simplify limited graphs + * by eliminating side branches. + * + * A "relevant" commit is one that is !UNINTERESTING (ie we are including it + * in our list), or that is a specified BOTTOM commit. Then after computing + * a limited list, during processing we can generally ignore boundary merges + * coming from outside the graph, (ie from irrelevant parents), and treat + * those merges as if they were single-parent. TREESAME is defined to consider + * only relevant parents, if any. If we are TREESAME to our on-graph parents, + * we don't care if we were !TREESAME to non-graph parents. + * + * Treating bottom commits as relevant ensures that a limited graph's + * connection to the actual bottom commit is not viewed as a side branch, but + * treated as part of the graph. For example: + * + * ....Z...A---X---o---o---B + * . / + * W---Y + * + * When computing "A..B", the A-X connection is at least as important as + * Y-X, despite A being flagged UNINTERESTING. + * + * And when computing --ancestry-path "A..B", the A-X connection is more + * important than Y-X, despite both A and Y being flagged UNINTERESTING. + */ +static inline int relevant_commit(struct commit *commit) +{ + return (commit->object.flags & (UNINTERESTING | BOTTOM)) != UNINTERESTING; +} + +/* + * Return a single relevant commit from a parent list. If we are a TREESAME + * commit, and this selects one of our parents, then we can safely simplify to + * that parent. + */ +static struct commit *one_relevant_parent(const struct rev_info *revs, + struct commit_list *orig) +{ + struct commit_list *list = orig; + struct commit *relevant = NULL; + + if (!orig) + return NULL; + + /* + * For 1-parent commits, or if first-parent-only, then return that + * first parent (even if not "relevant" by the above definition). + * TREESAME will have been set purely on that parent. + */ + if (revs->first_parent_only || !orig->next) + return orig->item; + + /* + * For multi-parent commits, identify a sole relevant parent, if any. + * If we have only one relevant parent, then TREESAME will be set purely + * with regard to that parent, and we can simplify accordingly. + * + * If we have more than one relevant parent, or no relevant parents + * (and multiple irrelevant ones), then we can't select a parent here + * and return NULL. + */ + while (list) { + struct commit *commit = list->item; + list = list->next; + if (relevant_commit(commit)) { + if (relevant) + return NULL; + relevant = commit; + } + } + return relevant; +} + /* * The goal is to get REV_TREE_NEW as the result only if the * diff consists of all '+' (and no other changes), REV_TREE_OLD @@ -502,27 +576,52 @@ static unsigned update_treesame(struct rev_info *revs, struct commit *commit) if (commit->parents && commit->parents->next) { unsigned n; struct treesame_state *st; + struct commit_list *p; + unsigned relevant_parents; + unsigned relevant_change, irrelevant_change; st = lookup_decoration(&revs->treesame, &commit->object); if (!st) die("update_treesame %s", sha1_to_hex(commit->object.sha1)); - commit->object.flags |= TREESAME; - for (n = 0; n < st->nparents; n++) { - if (!st->treesame[n]) { - commit->object.flags &= ~TREESAME; - break; - } + relevant_parents = 0; + relevant_change = irrelevant_change = 0; + for (p = commit->parents, n = 0; p; n++, p = p->next) { + if (relevant_commit(p->item)) { + relevant_change |= !st->treesame[n]; + relevant_parents++; + } else + irrelevant_change |= !st->treesame[n]; } + if (relevant_parents ? relevant_change : irrelevant_change) + commit->object.flags &= ~TREESAME; + else + commit->object.flags |= TREESAME; } return commit->object.flags & TREESAME; } +static inline int limiting_can_increase_treesame(const struct rev_info *revs) +{ + /* + * TREESAME is irrelevant unless prune && dense; + * if simplify_history is set, we can't have a mixture of TREESAME and + * !TREESAME INTERESTING parents (and we don't have treesame[] + * decoration anyway); + * if first_parent_only is set, then the TREESAME flag is locked + * against the first parent (and again we lack treesame[] decoration). + */ + return revs->prune && revs->dense && + !revs->simplify_history && + !revs->first_parent_only; +} + static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) { struct commit_list **pp, *parent; struct treesame_state *ts = NULL; - int tree_changed = 0, nth_parent; + int relevant_change = 0, irrelevant_change = 0; + int relevant_parents, nth_parent; /* * If we don't do pruning, everything is interesting @@ -546,10 +645,12 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) if (!revs->dense && !commit->parents->next) return; - for (pp = &commit->parents, nth_parent = 0; + for (pp = &commit->parents, nth_parent = 0, relevant_parents = 0; (parent = *pp) != NULL; pp = &parent->next, nth_parent++) { struct commit *p = parent->item; + if (relevant_commit(p)) + relevant_parents++; if (nth_parent == 1) { /* @@ -573,7 +674,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) !revs->simplify_history && !(commit->object.flags & UNINTERESTING)) { ts = initialise_treesame(revs, commit); - if (!tree_changed) + if (!(irrelevant_change || relevant_change)) ts->treesame[0] = 1; } } @@ -619,14 +720,27 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) /* fallthrough */ case REV_TREE_OLD: case REV_TREE_DIFFERENT: - tree_changed = 1; + if (relevant_commit(p)) + relevant_change = 1; + else + irrelevant_change = 1; continue; } die("bad tree compare for commit %s", sha1_to_hex(commit->object.sha1)); } - if (tree_changed) - return; - commit->object.flags |= TREESAME; + + /* + * TREESAME is straightforward for single-parent commits. For merge + * commits, it is most useful to define it so that "irrelevant" + * parents cannot make us !TREESAME - if we have any relevant + * parents, then we only consider TREESAMEness with respect to them, + * allowing irrelevant merges from uninteresting branches to be + * simplified away. Only if we have only irrelevant parents do we + * base TREESAME on them. Note that this logic is replicated in + * update_treesame, which should be kept in sync. + */ + if (relevant_parents ? !relevant_change : !irrelevant_change) + commit->object.flags |= TREESAME; } static void commit_list_insert_by_date_cached(struct commit *p, struct commit_list **head, @@ -998,6 +1112,18 @@ static int limit_list(struct rev_info *revs) free_commit_list(bottom); } + /* + * Check if any commits have become TREESAME by some of their parents + * becoming UNINTERESTING. + */ + if (limiting_can_increase_treesame(revs)) + for (list = newlist; list; list = list->next) { + struct commit *c = list->item; + if (c->object.flags & (UNINTERESTING | TREESAME)) + continue; + update_treesame(revs, c); + } + revs->commits = newlist; return 0; } @@ -2248,6 +2374,7 @@ static int remove_marked_parents(struct rev_info *revs, struct commit *commit) static struct commit_list **simplify_one(struct rev_info *revs, struct commit *commit, struct commit_list **tail) { struct commit_list *p; + struct commit *parent; struct merge_simplify_state *st, *pst; int cnt; @@ -2336,19 +2463,20 @@ static struct commit_list **simplify_one(struct rev_info *revs, struct commit *c /* * A commit simplifies to itself if it is a root, if it is * UNINTERESTING, if it touches the given paths, or if it is a - * merge and its parents simplify to more than one commit + * merge and its parents don't simplify to one relevant commit * (the first two cases are already handled at the beginning of * this function). * - * Otherwise, it simplifies to what its sole parent simplifies to. + * Otherwise, it simplifies to what its sole relevant parent + * simplifies to. */ if (!cnt || (commit->object.flags & UNINTERESTING) || !(commit->object.flags & TREESAME) || - (1 < cnt)) + (parent = one_relevant_parent(revs, commit->parents)) == NULL) st->simplified = commit; else { - pst = locate_simplify_state(revs, commit->parents->item); + pst = locate_simplify_state(revs, parent); st->simplified = pst->simplified; } return tail; @@ -2445,7 +2573,8 @@ int prepare_revision_walk(struct rev_info *revs) free(list); /* Signal whether we need per-parent treesame decoration */ - if (revs->simplify_merges) + if (revs->simplify_merges || + (revs->limited && limiting_can_increase_treesame(revs))) revs->treesame.name = "treesame"; if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED) @@ -2479,15 +2608,15 @@ static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp if (!revs->limited) if (add_parents_to_list(revs, p, &revs->commits, &cache) < 0) return rewrite_one_error; - if (p->parents && p->parents->next) - return rewrite_one_ok; if (p->object.flags & UNINTERESTING) return rewrite_one_ok; if (!(p->object.flags & TREESAME)) return rewrite_one_ok; if (!p->parents) return rewrite_one_noparents; - *pp = p->parents->item; + if ((p = one_relevant_parent(revs, p->parents)) == NULL) + return rewrite_one_ok; + *pp = p; } } -- cgit v1.2.1 From bf3418b08bac89902a5f6c70f8695f148df99828 Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Thu, 16 May 2013 18:32:40 +0300 Subject: revision.c: don't show all merges for --parents When using --parents or --children, get_commit_action() previously showed all merges, even if TREESAME to both parents. This was intended to tie together the topology of the rewritten parents, but it was excessive - in fact we only need to show merges that have two or more relevant parents. Merges at the boundary do not necessarily need to be shown. Signed-off-by: Kevin Bracey Signed-off-by: Junio C Hamano --- revision.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'revision.c') diff --git a/revision.c b/revision.c index 1c750705d8..edb7e1c4d1 100644 --- a/revision.c +++ b/revision.c @@ -2760,10 +2760,7 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi if (revs->min_age != -1 && (commit->date > revs->min_age)) return commit_ignore; if (revs->min_parents || (revs->max_parents >= 0)) { - int n = 0; - struct commit_list *p; - for (p = commit->parents; p; p = p->next) - n++; + int n = commit_list_count(commit->parents); if ((n < revs->min_parents) || ((revs->max_parents >= 0) && (n > revs->max_parents))) return commit_ignore; @@ -2773,12 +2770,23 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi if (revs->prune && revs->dense) { /* Commit without changes? */ if (commit->object.flags & TREESAME) { + int n; + struct commit_list *p; /* drop merges unless we want parenthood */ if (!want_ancestry(revs)) return commit_ignore; - /* non-merge - always ignore it */ - if (!commit->parents || !commit->parents->next) - return commit_ignore; + /* + * If we want ancestry, then need to keep any merges + * between relevant commits to tie together topology. + * For consistency with TREESAME and simplification + * use "relevant" here rather than just INTERESTING, + * to treat bottom commit(s) as part of the topology. + */ + for (n = 0, p = commit->parents; p; p = p->next) + if (relevant_commit(p->item)) + if (++n >= 2) + return commit_show; + return commit_ignore; } } return commit_show; -- cgit v1.2.1 From 141efdba57b1769fc60ff9a3925afbc6af398faf Mon Sep 17 00:00:00 2001 From: Kevin Bracey Date: Thu, 16 May 2013 18:32:41 +0300 Subject: revision.c: make default history consider bottom commits Previously, the default history treated bottom commits the same as any other UNINTERESTING commit, which could force it down side branches. Consider the following history: *A--*B---D--*F * marks !TREESAME parent paths \ /* `-C-' When requesting "B..F", B is UNINTERESTING but TREESAME to D. C is !UNINTERESTING. So default following would go from D into the irrelevant side branch C to A, rather than to B. Note also that if there had been an extra !UNINTERESTING commit B1 between B and D, it wouldn't have gone down C. Change the default following to test relevant_commit() instead of !UNINTERESTING, so it can proceed straight from D to B, thus finishing the traversal of that path. Signed-off-by: Kevin Bracey Signed-off-by: Junio C Hamano --- revision.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'revision.c') diff --git a/revision.c b/revision.c index edb7e1c4d1..914ac78328 100644 --- a/revision.c +++ b/revision.c @@ -684,7 +684,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) sha1_to_hex(p->object.sha1)); switch (rev_compare_tree(revs, p, commit)) { case REV_TREE_SAME: - if (!revs->simplify_history || (p->object.flags & UNINTERESTING)) { + if (!revs->simplify_history || !relevant_commit(p)) { /* Even if a merge with an uninteresting * side branch brought the entire change * we are interested in, we do not want -- cgit v1.2.1