diff options
author | Junio C Hamano <gitster@pobox.com> | 2015-03-28 09:33:06 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2015-03-28 09:33:06 -0700 |
commit | 9f389aa4920f147a6314719741b47074b2e4b727 (patch) | |
tree | edf7abf348e4b2ba4fe128801346357c512ce173 | |
parent | b37996e251872891f71b833c425acd05cf263547 (diff) | |
parent | ea56c4e02fc1d5ce6a6b5083c284e32ffc6367e6 (diff) | |
download | git-9f389aa4920f147a6314719741b47074b2e4b727.tar.gz |
Merge branch 'jk/prune-with-corrupt-refs' into maint
"git prune" used to largely ignore broken refs when deciding which
objects are still being used, which could spread an existing small
damage and make it a larger one.
* jk/prune-with-corrupt-refs:
refs.c: drop curate_packed_refs
repack: turn on "ref paranoia" when doing a destructive repack
prune: turn on ref_paranoia flag
refs: introduce a "ref paranoia" flag
t5312: test object deletion code paths in a corrupted repository
-rw-r--r-- | Documentation/git.txt | 11 | ||||
-rw-r--r-- | builtin/prune.c | 1 | ||||
-rw-r--r-- | builtin/repack.c | 8 | ||||
-rw-r--r-- | cache.h | 8 | ||||
-rw-r--r-- | environment.c | 1 | ||||
-rw-r--r-- | refs.c | 72 | ||||
-rwxr-xr-x | t/t5312-prune-corruption.sh | 114 |
7 files changed, 147 insertions, 68 deletions
diff --git a/Documentation/git.txt b/Documentation/git.txt index 9c75617051..a2596dc734 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -1027,6 +1027,17 @@ GIT_ICASE_PATHSPECS:: variable when it is invoked as the top level command by the end user, to be recorded in the body of the reflog. +`GIT_REF_PARANOIA`:: + If set to `1`, include broken or badly named refs when iterating + over lists of refs. In a normal, non-corrupted repository, this + does nothing. However, enabling it may help git to detect and + abort some operations in the presence of broken refs. Git sets + this variable automatically when performing destructive + operations like linkgit:git-prune[1]. You should not need to set + it yourself unless you want to be paranoid about making sure + an operation has touched every ref (e.g., because you are + cloning a repository to make a backup). + Discussion[[Discussion]] ------------------------ diff --git a/builtin/prune.c b/builtin/prune.c index 04d3b12ae4..17094ad954 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -115,6 +115,7 @@ int cmd_prune(int argc, const char **argv, const char *prefix) expire = ULONG_MAX; save_commit_buffer = 0; check_replace_refs = 0; + ref_paranoia = 1; init_revisions(&revs, prefix); argc = parse_options(argc, argv, prefix, options, prune_usage, 0); diff --git a/builtin/repack.c b/builtin/repack.c index 3f852f35d1..2fe1b30d71 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -228,13 +228,17 @@ int cmd_repack(int argc, const char **argv, const char *prefix) get_non_kept_pack_filenames(&existing_packs); if (existing_packs.nr && delete_redundant) { - if (unpack_unreachable) + if (unpack_unreachable) { argv_array_pushf(&cmd.args, "--unpack-unreachable=%s", unpack_unreachable); - else if (pack_everything & LOOSEN_UNREACHABLE) + argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1"); + } else if (pack_everything & LOOSEN_UNREACHABLE) { argv_array_push(&cmd.args, "--unpack-unreachable"); + } else { + argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1"); + } } } else { argv_array_push(&cmd.args, "--unpacked"); @@ -614,6 +614,14 @@ extern int protect_hfs; extern int protect_ntfs; /* + * Include broken refs in all ref iterations, which will + * generally choke dangerous operations rather than letting + * them silently proceed without taking the broken ref into + * account. + */ +extern int ref_paranoia; + +/* * The character that begins a commented line in user-editable file * that is subject to stripspace. */ diff --git a/environment.c b/environment.c index 1ade5c9684..a40044c3bf 100644 --- a/environment.c +++ b/environment.c @@ -24,6 +24,7 @@ int is_bare_repository_cfg = -1; /* unspecified */ int log_all_ref_updates = -1; /* unspecified */ int warn_ambiguous_refs = 1; int warn_on_object_refname_ambiguity = 1; +int ref_paranoia = -1; int repository_format_version; const char *git_commit_encoding; const char *git_log_output_encoding; @@ -1907,6 +1907,11 @@ static int do_for_each_ref(struct ref_cache *refs, const char *base, data.fn = fn; data.cb_data = cb_data; + if (ref_paranoia < 0) + ref_paranoia = git_env_bool("GIT_REF_PARANOIA", 0); + if (ref_paranoia) + data.flags |= DO_FOR_EACH_INCLUDE_BROKEN; + return do_for_each_entry(refs, base, do_one_ref, &data); } @@ -2591,68 +2596,10 @@ int pack_refs(unsigned int flags) return 0; } -/* - * If entry is no longer needed in packed-refs, add it to the string - * list pointed to by cb_data. Reasons for deleting entries: - * - * - Entry is broken. - * - Entry is overridden by a loose ref. - * - Entry does not point at a valid object. - * - * In the first and third cases, also emit an error message because these - * are indications of repository corruption. - */ -static int curate_packed_ref_fn(struct ref_entry *entry, void *cb_data) -{ - struct string_list *refs_to_delete = cb_data; - - if (entry->flag & REF_ISBROKEN) { - /* This shouldn't happen to packed refs. */ - error("%s is broken!", entry->name); - string_list_append(refs_to_delete, entry->name); - return 0; - } - if (!has_sha1_file(entry->u.value.sha1)) { - unsigned char sha1[20]; - int flags; - - if (read_ref_full(entry->name, 0, sha1, &flags)) - /* We should at least have found the packed ref. */ - die("Internal error"); - if ((flags & REF_ISSYMREF) || !(flags & REF_ISPACKED)) { - /* - * This packed reference is overridden by a - * loose reference, so it is OK that its value - * is no longer valid; for example, it might - * refer to an object that has been garbage - * collected. For this purpose we don't even - * care whether the loose reference itself is - * invalid, broken, symbolic, etc. Silently - * remove the packed reference. - */ - string_list_append(refs_to_delete, entry->name); - return 0; - } - /* - * There is no overriding loose reference, so the fact - * that this reference doesn't refer to a valid object - * indicates some kind of repository corruption. - * Report the problem, then omit the reference from - * the output. - */ - error("%s does not point to a valid object!", entry->name); - string_list_append(refs_to_delete, entry->name); - return 0; - } - - return 0; -} - int repack_without_refs(struct string_list *refnames, struct strbuf *err) { struct ref_dir *packed; - struct string_list refs_to_delete = STRING_LIST_INIT_DUP; - struct string_list_item *refname, *ref_to_delete; + struct string_list_item *refname; int ret, needs_repacking = 0, removed = 0; assert(err); @@ -2688,13 +2635,6 @@ int repack_without_refs(struct string_list *refnames, struct strbuf *err) return 0; } - /* Remove any other accumulated cruft */ - do_for_each_entry_in_dir(packed, 0, curate_packed_ref_fn, &refs_to_delete); - for_each_string_list_item(ref_to_delete, &refs_to_delete) { - if (remove_entry(packed, ref_to_delete->string) == -1) - die("internal error"); - } - /* Write what remains */ ret = commit_packed_refs(); if (ret) diff --git a/t/t5312-prune-corruption.sh b/t/t5312-prune-corruption.sh new file mode 100755 index 0000000000..8e98b44083 --- /dev/null +++ b/t/t5312-prune-corruption.sh @@ -0,0 +1,114 @@ +#!/bin/sh + +test_description=' +Test pruning of repositories with minor corruptions. The goal +here is that we should always be erring on the side of safety. So +if we see, for example, a ref with a bogus name, it is OK either to +bail out or to proceed using it as a reachable tip, but it is _not_ +OK to proceed as if it did not exist. Otherwise we might silently +delete objects that cannot be recovered. +' +. ./test-lib.sh + +test_expect_success 'disable reflogs' ' + git config core.logallrefupdates false && + rm -rf .git/logs +' + +test_expect_success 'create history reachable only from a bogus-named ref' ' + test_tick && git commit --allow-empty -m master && + base=$(git rev-parse HEAD) && + test_tick && git commit --allow-empty -m bogus && + bogus=$(git rev-parse HEAD) && + git cat-file commit $bogus >saved && + echo $bogus >.git/refs/heads/bogus..name && + git reset --hard HEAD^ +' + +test_expect_success 'pruning does not drop bogus object' ' + test_when_finished "git hash-object -w -t commit saved" && + test_might_fail git prune --expire=now && + verbose git cat-file -e $bogus +' + +test_expect_success 'put bogus object into pack' ' + git tag reachable $bogus && + git repack -ad && + git tag -d reachable && + verbose git cat-file -e $bogus +' + +test_expect_success 'destructive repack keeps packed object' ' + test_might_fail git repack -Ad --unpack-unreachable=now && + verbose git cat-file -e $bogus && + test_might_fail git repack -ad && + verbose git cat-file -e $bogus +' + +# subsequent tests will have different corruptions +test_expect_success 'clean up bogus ref' ' + rm .git/refs/heads/bogus..name +' + +# We create two new objects here, "one" and "two". Our +# master branch points to "two", which is deleted, +# corrupting the repository. But we'd like to make sure +# that the otherwise unreachable "one" is not pruned +# (since it is the user's best bet for recovering +# from the corruption). +# +# Note that we also point HEAD somewhere besides "two", +# as we want to make sure we test the case where we +# pick up the reference to "two" by iterating the refs, +# not by resolving HEAD. +test_expect_success 'create history with missing tip commit' ' + test_tick && git commit --allow-empty -m one && + recoverable=$(git rev-parse HEAD) && + git cat-file commit $recoverable >saved && + test_tick && git commit --allow-empty -m two && + missing=$(git rev-parse HEAD) && + git checkout --detach $base && + rm .git/objects/$(echo $missing | sed "s,..,&/,") && + test_must_fail git cat-file -e $missing +' + +test_expect_success 'pruning with a corrupted tip does not drop history' ' + test_when_finished "git hash-object -w -t commit saved" && + test_might_fail git prune --expire=now && + verbose git cat-file -e $recoverable +' + +test_expect_success 'pack-refs does not silently delete broken loose ref' ' + git pack-refs --all --prune && + echo $missing >expect && + git rev-parse refs/heads/master >actual && + test_cmp expect actual +' + +# we do not want to count on running pack-refs to +# actually pack it, as it is perfectly reasonable to +# skip processing a broken ref +test_expect_success 'create packed-refs file with broken ref' ' + rm -f .git/refs/heads/master && + cat >.git/packed-refs <<-EOF && + $missing refs/heads/master + $recoverable refs/heads/other + EOF + echo $missing >expect && + git rev-parse refs/heads/master >actual && + test_cmp expect actual +' + +test_expect_success 'pack-refs does not silently delete broken packed ref' ' + git pack-refs --all --prune && + git rev-parse refs/heads/master >actual && + test_cmp expect actual +' + +test_expect_success 'pack-refs does not drop broken refs during deletion' ' + git update-ref -d refs/heads/other && + git rev-parse refs/heads/master >actual && + test_cmp expect actual +' + +test_done |