diff options
author | Kirill Smelkov <kirr@mns.spb.ru> | 2014-02-24 20:21:51 +0400 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2014-04-07 14:41:49 -0700 |
commit | 7195fbfaf5a539b8e8358097e02b63991e78a565 (patch) | |
tree | 5fb4ddafccf80986e24b40c29ebafefecf643d51 /combine-diff.c | |
parent | 72441af7c4e3bde33cdf7edafcf09c227d5d5296 (diff) | |
download | git-7195fbfaf5a539b8e8358097e02b63991e78a565.tar.gz |
combine-diff: speed it up, by using multiparent diff tree-walker directly
As was recently shown in "combine-diff: optimize
combine_diff_path sets intersection", combine-diff runs very slowly. In
that commit we optimized paths sets intersection, but that accounted
only for ~ 25% of the slowness, and as my tracing showed, for linux.git
v3.10..v3.11, for merges a lot of time is spent computing
diff(commit,commit^2) just to only then intersect that huge diff to
almost small set of files from diff(commit,commit^1).
In previous commit, we described the problem in more details, and
reworked the diff tree-walker to be general one - i.e. to work in
multiple parent case too. Now is the time to take advantage of it for
finding paths for combine diff.
The implementation is straightforward - if we know, we can get generated
diff paths directly, and at present that means no diff filtering or
rename/copy detection was requested(*), we can call multiparent tree-walker
directly and get ready paths.
(*) because e.g. at present, all diffcore transformations work on
diff_filepair queues, but in the future, that limitation can be
lifted, if filters would operate directly on combine_diff_paths.
Timings for `git log --raw --no-abbrev --no-renames` without `-c` ("git log")
and with `-c` ("git log -c") and with `-c --merges` ("git log -c --merges")
before and after the patch are as follows:
linux.git v3.10..v3.11
log log -c log -c --merges
before 1.9s 16.4s 15.2s
after 1.9s 2.4s 1.1s
The result stayed the same.
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'combine-diff.c')
-rw-r--r-- | combine-diff.c | 88 |
1 files changed, 83 insertions, 5 deletions
diff --git a/combine-diff.c b/combine-diff.c index 1732dfd110..12764fb733 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1303,7 +1303,7 @@ static const char *path_path(void *obj) /* find set of paths that every parent touches */ -static struct combine_diff_path *find_paths(const unsigned char *sha1, +static struct combine_diff_path *find_paths_generic(const unsigned char *sha1, const struct sha1_array *parents, struct diff_options *opt) { struct combine_diff_path *paths = NULL; @@ -1316,6 +1316,7 @@ static struct combine_diff_path *find_paths(const unsigned char *sha1, /* tell diff_tree to emit paths in sorted (=tree) order */ opt->orderfile = NULL; + /* D(A,P1...Pn) = D(A,P1) ^ ... ^ D(A,Pn) (wrt paths) */ for (i = 0; i < num_parent; i++) { /* * show stat against the first parent even when doing @@ -1346,6 +1347,35 @@ static struct combine_diff_path *find_paths(const unsigned char *sha1, } +/* + * find set of paths that everybody touches, assuming diff is run without + * rename/copy detection, etc, comparing all trees simultaneously (= faster). + */ +static struct combine_diff_path *find_paths_multitree( + const unsigned char *sha1, const struct sha1_array *parents, + struct diff_options *opt) +{ + int i, nparent = parents->nr; + const unsigned char **parents_sha1; + struct combine_diff_path paths_head; + struct strbuf base; + + parents_sha1 = xmalloc(nparent * sizeof(parents_sha1[0])); + for (i = 0; i < nparent; i++) + parents_sha1[i] = parents->sha1[i]; + + /* fake list head, so worker can assume it is non-NULL */ + paths_head.next = NULL; + + strbuf_init(&base, PATH_MAX); + diff_tree_paths(&paths_head, sha1, parents_sha1, nparent, &base, opt); + + strbuf_release(&base); + free(parents_sha1); + return paths_head.next; +} + + void diff_tree_combined(const unsigned char *sha1, const struct sha1_array *parents, int dense, @@ -1355,6 +1385,7 @@ void diff_tree_combined(const unsigned char *sha1, struct diff_options diffopts; struct combine_diff_path *p, *paths; int i, num_paths, needsep, show_log_first, num_parent = parents->nr; + int need_generic_pathscan; /* nothing to do, if no parents */ if (!num_parent) @@ -1377,11 +1408,58 @@ void diff_tree_combined(const unsigned char *sha1, /* find set of paths that everybody touches * - * NOTE find_paths() also handles --stat, as it computes - * diff(sha1,parent_i) for all i to do the job, specifically - * for parent0. + * NOTE + * + * Diffcore transformations are bound to diff_filespec and logic + * comparing two entries - i.e. they do not apply directly to combine + * diff. + * + * If some of such transformations is requested - we launch generic + * path scanning, which works significantly slower compared to + * simultaneous all-trees-in-one-go scan in find_paths_multitree(). + * + * TODO some of the filters could be ported to work on + * combine_diff_paths - i.e. all functionality that skips paths, so in + * theory, we could end up having only multitree path scanning. + * + * NOTE please keep this semantically in sync with diffcore_std() */ - paths = find_paths(sha1, parents, &diffopts); + need_generic_pathscan = opt->skip_stat_unmatch || + DIFF_OPT_TST(opt, FOLLOW_RENAMES) || + opt->break_opt != -1 || + opt->detect_rename || + opt->pickaxe || + opt->filter; + + + if (need_generic_pathscan) { + /* + * NOTE generic case also handles --stat, as it computes + * diff(sha1,parent_i) for all i to do the job, specifically + * for parent0. + */ + paths = find_paths_generic(sha1, parents, &diffopts); + } + else { + int stat_opt; + paths = find_paths_multitree(sha1, parents, &diffopts); + + /* + * show stat against the first parent even + * when doing combined diff. + */ + stat_opt = (opt->output_format & + (DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT)); + if (stat_opt) { + diffopts.output_format = stat_opt; + + diff_tree_sha1(parents->sha1[0], sha1, "", &diffopts); + diffcore_std(&diffopts); + if (opt->orderfile) + diffcore_order(opt->orderfile); + diff_flush(&diffopts); + } + } /* find out number of surviving paths */ for (num_paths = 0, p = paths; p; p = p->next) |