diff options
author | Junio C Hamano <gitster@pobox.com> | 2016-08-12 09:47:39 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2016-08-12 09:47:39 -0700 |
commit | dd610aeda684e42d3933a719cbd59ffbcdfdbcaa (patch) | |
tree | 3001212a96785e66a7d82e18116badd5007bc0ef | |
parent | 3787e3c16ced0e3a614766dfbb55f8cbd70762c1 (diff) | |
parent | b3dfeebb92630c54db1e4f03dbcff0e05208c4c1 (diff) | |
download | git-dd610aeda684e42d3933a719cbd59ffbcdfdbcaa.tar.gz |
Merge branch 'kw/patch-ids-optim'
When "git rebase" tries to compare set of changes on the updated
upstream and our own branch, it computes patch-id for all of these
changes and attempts to find matches. This has been optimized by
lazily computing the full patch-id (which is expensive) to be
compared only for changes that touch the same set of paths.
* kw/patch-ids-optim:
rebase: avoid computing unnecessary patch IDs
patch-ids: add flag to create the diff patch id using header only data
patch-ids: replace the seen indicator with a commit pointer
patch-ids: stop using a hand-rolled hashmap implementation
-rw-r--r-- | builtin/log.c | 2 | ||||
-rw-r--r-- | diff.c | 16 | ||||
-rw-r--r-- | diff.h | 2 | ||||
-rw-r--r-- | patch-ids.c | 109 | ||||
-rw-r--r-- | patch-ids.h | 11 | ||||
-rw-r--r-- | revision.c | 18 | ||||
-rw-r--r-- | t/perf/p3400-rebase.sh | 36 | ||||
-rwxr-xr-x | t/t6007-rev-list-cherry-pick-file.sh | 21 |
8 files changed, 128 insertions, 87 deletions
diff --git a/builtin/log.c b/builtin/log.c index 1f116bea8c..92dc34dcb0 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1343,7 +1343,7 @@ static void prepare_bases(struct base_tree_info *bases, struct object_id *patch_id; if (commit->util) continue; - if (commit_patch_id(commit, &diffopt, sha1)) + if (commit_patch_id(commit, &diffopt, sha1, 0)) die(_("cannot get patch id")); ALLOC_GROW(bases->patch_id, bases->nr_patch_id + 1, bases->alloc_patch_id); patch_id = bases->patch_id + bases->nr_patch_id; @@ -4462,7 +4462,7 @@ static void patch_id_consume(void *priv, char *line, unsigned long len) } /* returns 0 upon success, and writes result into sha1 */ -static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1) +static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only) { struct diff_queue_struct *q = &diff_queued_diff; int i; @@ -4497,9 +4497,6 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1) diff_fill_sha1_info(p->one); diff_fill_sha1_info(p->two); - if (fill_mmfile(&mf1, p->one) < 0 || - fill_mmfile(&mf2, p->two) < 0) - return error("unable to read files to diff"); len1 = remove_space(p->one->path, strlen(p->one->path)); len2 = remove_space(p->two->path, strlen(p->two->path)); @@ -4534,6 +4531,13 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1) len2, p->two->path); git_SHA1_Update(&ctx, buffer, len1); + if (diff_header_only) + continue; + + if (fill_mmfile(&mf1, p->one) < 0 || + fill_mmfile(&mf2, p->two) < 0) + return error("unable to read files to diff"); + if (diff_filespec_is_binary(p->one) || diff_filespec_is_binary(p->two)) { git_SHA1_Update(&ctx, oid_to_hex(&p->one->oid), @@ -4556,11 +4560,11 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1) return 0; } -int diff_flush_patch_id(struct diff_options *options, unsigned char *sha1) +int diff_flush_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only) { struct diff_queue_struct *q = &diff_queued_diff; int i; - int result = diff_get_patch_id(options, sha1); + int result = diff_get_patch_id(options, sha1, diff_header_only); for (i = 0; i < q->nr; i++) diff_free_filepair(q->queue[i]); @@ -342,7 +342,7 @@ extern int run_diff_files(struct rev_info *revs, unsigned int option); extern int run_diff_index(struct rev_info *revs, int cached); extern int do_diff_cache(const unsigned char *, struct diff_options *); -extern int diff_flush_patch_id(struct diff_options *, unsigned char *); +extern int diff_flush_patch_id(struct diff_options *, unsigned char *, int); extern int diff_result_code(struct diff_options *, int); diff --git a/patch-ids.c b/patch-ids.c index a4d0016664..082412aca6 100644 --- a/patch-ids.c +++ b/patch-ids.c @@ -5,7 +5,7 @@ #include "patch-ids.h" int commit_patch_id(struct commit *commit, struct diff_options *options, - unsigned char *sha1) + unsigned char *sha1, int diff_header_only) { if (commit->parents) diff_tree_sha1(commit->parents->item->object.oid.hash, @@ -13,93 +13,86 @@ int commit_patch_id(struct commit *commit, struct diff_options *options, else diff_root_tree_sha1(commit->object.oid.hash, "", options); diffcore_std(options); - return diff_flush_patch_id(options, sha1); + return diff_flush_patch_id(options, sha1, diff_header_only); } -static const unsigned char *patch_id_access(size_t index, void *table) +/* + * When we cannot load the full patch-id for both commits for whatever + * reason, the function returns -1 (i.e. return error(...)). Despite + * the "cmp" in the name of this function, the caller only cares about + * the return value being zero (a and b are equivalent) or non-zero (a + * and b are different), and returning non-zero would keep both in the + * result, even if they actually were equivalent, in order to err on + * the side of safety. The actual value being negative does not have + * any significance; only that it is non-zero matters. + */ +static int patch_id_cmp(struct patch_id *a, + struct patch_id *b, + struct diff_options *opt) { - struct patch_id **id_table = table; - return id_table[index]->patch_id; + if (is_null_sha1(a->patch_id) && + commit_patch_id(a->commit, opt, a->patch_id, 0)) + return error("Could not get patch ID for %s", + oid_to_hex(&a->commit->object.oid)); + if (is_null_sha1(b->patch_id) && + commit_patch_id(b->commit, opt, b->patch_id, 0)) + return error("Could not get patch ID for %s", + oid_to_hex(&b->commit->object.oid)); + return hashcmp(a->patch_id, b->patch_id); } -static int patch_pos(struct patch_id **table, int nr, const unsigned char *id) -{ - return sha1_pos(id, table, nr, patch_id_access); -} - -#define BUCKET_SIZE 190 /* 190 * 21 = 3990, with slop close enough to 4K */ -struct patch_id_bucket { - struct patch_id_bucket *next; - int nr; - struct patch_id bucket[BUCKET_SIZE]; -}; - int init_patch_ids(struct patch_ids *ids) { memset(ids, 0, sizeof(*ids)); diff_setup(&ids->diffopts); DIFF_OPT_SET(&ids->diffopts, RECURSIVE); diff_setup_done(&ids->diffopts); + hashmap_init(&ids->patches, (hashmap_cmp_fn)patch_id_cmp, 256); return 0; } int free_patch_ids(struct patch_ids *ids) { - struct patch_id_bucket *next, *patches; - - free(ids->table); - for (patches = ids->patches; patches; patches = next) { - next = patches->next; - free(patches); - } + hashmap_free(&ids->patches, 1); return 0; } -static struct patch_id *add_commit(struct commit *commit, - struct patch_ids *ids, - int no_add) +static int init_patch_id_entry(struct patch_id *patch, + struct commit *commit, + struct patch_ids *ids) { - struct patch_id_bucket *bucket; - struct patch_id *ent; - unsigned char sha1[20]; - int pos; + unsigned char header_only_patch_id[GIT_SHA1_RAWSZ]; - if (commit_patch_id(commit, &ids->diffopts, sha1)) - return NULL; - pos = patch_pos(ids->table, ids->nr, sha1); - if (0 <= pos) - return ids->table[pos]; - if (no_add) - return NULL; + patch->commit = commit; + if (commit_patch_id(commit, &ids->diffopts, header_only_patch_id, 1)) + return -1; - pos = -1 - pos; - - bucket = ids->patches; - if (!bucket || (BUCKET_SIZE <= bucket->nr)) { - bucket = xcalloc(1, sizeof(*bucket)); - bucket->next = ids->patches; - ids->patches = bucket; - } - ent = &bucket->bucket[bucket->nr++]; - hashcpy(ent->patch_id, sha1); - - ALLOC_GROW(ids->table, ids->nr + 1, ids->alloc); - if (pos < ids->nr) - memmove(ids->table + pos + 1, ids->table + pos, - sizeof(ent) * (ids->nr - pos)); - ids->nr++; - ids->table[pos] = ent; - return ids->table[pos]; + hashmap_entry_init(patch, sha1hash(header_only_patch_id)); + return 0; } struct patch_id *has_commit_patch_id(struct commit *commit, struct patch_ids *ids) { - return add_commit(commit, ids, 1); + struct patch_id patch; + + memset(&patch, 0, sizeof(patch)); + if (init_patch_id_entry(&patch, commit, ids)) + return NULL; + + return hashmap_get(&ids->patches, &patch, &ids->diffopts); } struct patch_id *add_commit_patch_id(struct commit *commit, struct patch_ids *ids) { - return add_commit(commit, ids, 0); + struct patch_id *key = xcalloc(1, sizeof(*key)); + + if (init_patch_id_entry(key, commit, ids)) { + free(key); + return NULL; + } + + hashmap_add(&ids->patches, key); + return key; } diff --git a/patch-ids.h b/patch-ids.h index eeb56b307f..0f34ea11ea 100644 --- a/patch-ids.h +++ b/patch-ids.h @@ -2,19 +2,18 @@ #define PATCH_IDS_H struct patch_id { - unsigned char patch_id[20]; - char seen; + struct hashmap_entry ent; + unsigned char patch_id[GIT_SHA1_RAWSZ]; + struct commit *commit; }; struct patch_ids { + struct hashmap patches; struct diff_options diffopts; - int nr, alloc; - struct patch_id **table; - struct patch_id_bucket *patches; }; int commit_patch_id(struct commit *commit, struct diff_options *options, - unsigned char *sha1); + unsigned char *sha1, int); int init_patch_ids(struct patch_ids *); int free_patch_ids(struct patch_ids *); struct patch_id *add_commit_patch_id(struct commit *, struct patch_ids *); diff --git a/revision.c b/revision.c index 15873bf24d..8a29cb03c5 100644 --- a/revision.c +++ b/revision.c @@ -846,7 +846,7 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs) */ if (left_first != !!(flags & SYMMETRIC_LEFT)) continue; - commit->util = add_commit_patch_id(commit, &ids); + add_commit_patch_id(commit, &ids); } /* either cherry_mark or cherry_pick are true */ @@ -873,21 +873,9 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs) id = has_commit_patch_id(commit, &ids); if (!id) continue; - id->seen = 1; - commit->object.flags |= cherry_flag; - } - /* Now check the original side for seen ones */ - for (p = list; p; p = p->next) { - struct commit *commit = p->item; - struct patch_id *ent; - - ent = commit->util; - if (!ent) - continue; - if (ent->seen) - commit->object.flags |= cherry_flag; - commit->util = NULL; + commit->object.flags |= cherry_flag; + id->commit->object.flags |= cherry_flag; } free_patch_ids(&ids); diff --git a/t/perf/p3400-rebase.sh b/t/perf/p3400-rebase.sh new file mode 100644 index 0000000000..b3e7d525d2 --- /dev/null +++ b/t/perf/p3400-rebase.sh @@ -0,0 +1,36 @@ +#!/bin/sh + +test_description='Tests rebase performance' +. ./perf-lib.sh + +test_perf_default_repo + +test_expect_success 'setup' ' + git checkout -f -b base && + git checkout -b to-rebase && + git checkout -b upstream && + for i in $(seq 100) + do + # simulate huge diffs + echo change$i >unrelated-file$i && + seq 1000 >>unrelated-file$i && + git add unrelated-file$i && + test_tick && + git commit -m commit$i unrelated-file$i && + echo change$i >unrelated-file$i && + seq 1000 | tac >>unrelated-file$i && + git add unrelated-file$i && + test_tick && + git commit -m commit$i-reverse unrelated-file$i || + break + done && + git checkout to-rebase && + test_commit our-patch interesting-file +' + +test_perf 'rebase on top of a lot of unrelated changes' ' + git rebase --onto upstream HEAD^ && + git rebase --onto base HEAD^ +' + +test_done diff --git a/t/t6007-rev-list-cherry-pick-file.sh b/t/t6007-rev-list-cherry-pick-file.sh index 28d4f6b259..1408b608eb 100755 --- a/t/t6007-rev-list-cherry-pick-file.sh +++ b/t/t6007-rev-list-cherry-pick-file.sh @@ -207,4 +207,25 @@ test_expect_success '--count --left-right' ' test_cmp expect actual ' +# Corrupt the object store deliberately to make sure +# the object is not even checked for its existence. +remove_loose_object () { + sha1="$(git rev-parse "$1")" && + remainder=${sha1#??} && + firsttwo=${sha1%$remainder} && + rm .git/objects/$firsttwo/$remainder +} + +test_expect_success '--cherry-pick avoids looking at full diffs' ' + git checkout -b shy-diff && + test_commit dont-look-at-me && + echo Hello >dont-look-at-me.t && + test_tick && + git commit -m tip dont-look-at-me.t && + git checkout -b mainline HEAD^ && + test_commit to-cherry-pick && + remove_loose_object shy-diff^:dont-look-at-me.t && + git rev-list --cherry-pick ...shy-diff +' + test_done |