From d0d46abc167d18fdbdbf2ece8ca6df704517c62f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:41:53 -0400 Subject: pack-objects: refactor unpack-unreachable expiration check When we are loosening unreachable packed objects, we do not bother to process objects that would simply be pruned immediately anyway. The "would be pruned" check is a simple comparison, but is about to get more complicated. Let's pull it out into a separate function. Note that this is slightly less efficient than the original, which avoided even opening old packs, since no object in them could pass the current check, which cares only about the pack mtime. But the new rules will depend on the exact object, so we need to perform the check even for old packs. Note also that we fix a minor buglet when the pack mtime is exactly the same as the expiration time. The prune code considers that worth pruning, whereas our check here considered it worth keeping. This wasn't a big deal. Besides being unlikely to happen, the result was simply that the object was loosened and then pruned, missing the optimization. Still, we can easily fix it while we are here. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index d39193453a..2fe2ab0603 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2407,6 +2407,16 @@ static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1) return 0; } +static int loosened_object_can_be_discarded(const unsigned char *sha1, + unsigned long mtime) +{ + if (!unpack_unreachable_expiration) + return 0; + if (mtime > unpack_unreachable_expiration) + return 0; + return 1; +} + static void loosen_unused_packed_objects(struct rev_info *revs) { struct packed_git *p; @@ -2417,17 +2427,14 @@ static void loosen_unused_packed_objects(struct rev_info *revs) if (!p->pack_local || p->pack_keep) continue; - if (unpack_unreachable_expiration && - p->mtime < unpack_unreachable_expiration) - continue; - if (open_pack_index(p)) die("cannot open pack index"); for (i = 0; i < p->num_objects; i++) { sha1 = nth_packed_object_sha1(p, i); if (!packlist_find(&to_pack, sha1, NULL) && - !has_sha1_pack_kept_or_nonlocal(sha1)) + !has_sha1_pack_kept_or_nonlocal(sha1) && + !loosened_object_can_be_discarded(sha1, p->mtime)) if (force_object_loose(sha1, p->mtime)) die("unable to force loose object"); } -- cgit v1.2.1 From abcb86553d3ec4afffa4e3963089dffe0559740e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:42:09 -0400 Subject: pack-objects: match prune logic for discarding objects A recent commit taught git-prune to keep non-recent objects that are reachable from recent ones. However, pack-objects, when loosening unreachable objects, tries to optimize out the write in the case that the object will be immediately pruned. It now gets this wrong, since its rule does not reflect the new prune code (and this can be seen by running t6501 with a strategically placed repack). Let's teach pack-objects similar logic. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 2fe2ab0603..4df9499040 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -20,6 +20,8 @@ #include "streaming.h" #include "thread-utils.h" #include "pack-bitmap.h" +#include "reachable.h" +#include "sha1-array.h" static const char *pack_usage[] = { N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"), @@ -2407,6 +2409,15 @@ static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1) return 0; } +/* + * Store a list of sha1s that are should not be discarded + * because they are either written too recently, or are + * reachable from another object that was. + * + * This is filled by get_object_list. + */ +static struct sha1_array recent_objects; + static int loosened_object_can_be_discarded(const unsigned char *sha1, unsigned long mtime) { @@ -2414,6 +2425,8 @@ static int loosened_object_can_be_discarded(const unsigned char *sha1, return 0; if (mtime > unpack_unreachable_expiration) return 0; + if (sha1_array_lookup(&recent_objects, sha1) >= 0) + return 0; return 1; } @@ -2470,6 +2483,19 @@ static int get_object_list_from_bitmap(struct rev_info *revs) return 0; } +static void record_recent_object(struct object *obj, + const struct name_path *path, + const char *last, + void *data) +{ + sha1_array_append(&recent_objects, obj->sha1); +} + +static void record_recent_commit(struct commit *commit, void *data) +{ + sha1_array_append(&recent_objects, commit->object.sha1); +} + static void get_object_list(int ac, const char **av) { struct rev_info revs; @@ -2517,10 +2543,23 @@ static void get_object_list(int ac, const char **av) mark_edges_uninteresting(&revs, show_edge); traverse_commit_list(&revs, show_commit, show_object, NULL); + if (unpack_unreachable_expiration) { + revs.ignore_missing_links = 1; + if (add_unseen_recent_objects_to_traversal(&revs, + unpack_unreachable_expiration)) + die("unable to add recent objects"); + if (prepare_revision_walk(&revs)) + die("revision walk setup failed"); + traverse_commit_list(&revs, record_recent_commit, + record_recent_object, NULL); + } + if (keep_unreachable) add_objects_in_unpacked_packs(&revs); if (unpack_unreachable) loosen_unused_packed_objects(&revs); + + sha1_array_clear(&recent_objects); } static int option_parse_index_version(const struct option *opt, -- cgit v1.2.1 From edfbb2aa538148b38ee0ba6d62c95b7edda5d817 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 16 Oct 2014 20:44:35 -0400 Subject: pack-objects: use argv_array This saves us from having to bump the rp_av count when we add new traversal options. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 4df9499040..b26276b424 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -22,6 +22,7 @@ #include "pack-bitmap.h" #include "reachable.h" #include "sha1-array.h" +#include "argv-array.h" static const char *pack_usage[] = { N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"), @@ -2614,8 +2615,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) int use_internal_rev_list = 0; int thin = 0; int all_progress_implied = 0; - const char *rp_av[6]; - int rp_ac = 0; + struct argv_array rp = ARGV_ARRAY_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; struct option pack_objects_options[] = { OPT_SET_INT('q', "quiet", &progress, @@ -2705,24 +2705,24 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (pack_to_stdout != !base_name || argc) usage_with_options(pack_usage, pack_objects_options); - rp_av[rp_ac++] = "pack-objects"; + argv_array_push(&rp, "pack-objects"); if (thin) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--objects-edge"; + argv_array_push(&rp, "--objects-edge"); } else - rp_av[rp_ac++] = "--objects"; + argv_array_push(&rp, "--objects"); if (rev_list_all) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--all"; + argv_array_push(&rp, "--all"); } if (rev_list_reflog) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--reflog"; + argv_array_push(&rp, "--reflog"); } if (rev_list_unpacked) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--unpacked"; + argv_array_push(&rp, "--unpacked"); } if (!reuse_object) @@ -2766,8 +2766,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!use_internal_rev_list) read_object_list_from_stdin(); else { - rp_av[rp_ac] = NULL; - get_object_list(rp_ac, rp_av); + get_object_list(rp.argc, rp.argv); + argv_array_clear(&rp); } cleanup_preferred_base(); if (include_tag && nr_result) -- cgit v1.2.1 From c90f9e13abae630551ada3e895633bdc2cf4e080 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 16 Oct 2014 20:44:49 -0400 Subject: repack: pack objects mentioned by the index When we pack all objects, we use only the objects reachable from references and reflogs. This misses any objects which are reachable from the index, but not yet referenced. By itself this isn't a big deal; the objects can remain loose until they are actually used in a commit. However, it does create a problem when we drop packed but unreachable objects. We try to optimize out the writing of objects that we will immediately prune, which means we must follow the same rules as prune in determining what is reachable. And prune uses the index for this purpose. This is rather uncommon in practice, as objects in the index would not usually have been packed in the first place. But it could happen in a sequence like: 1. You make a commit on a branch that references blob X. 2. You repack, moving X into the pack. 3. You delete the branch (and its reflog), so that X is unreferenced. 4. You "git add" blob X so that it is now referenced only by the index. 5. You repack again with git-gc. The pack-objects we invoke will see that X is neither referenced nor recent and not bother loosening it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b26276b424..0cf95c9901 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2617,6 +2617,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) int all_progress_implied = 0; struct argv_array rp = ARGV_ARRAY_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; + int rev_list_index = 0; struct option pack_objects_options[] = { OPT_SET_INT('q', "quiet", &progress, N_("do not show progress meter"), 0), @@ -2663,6 +2664,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) { OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL, N_("include objects referred by reflog entries"), PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, + { OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL, + N_("include objects referred to by the index"), + PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, OPT_BOOL(0, "stdout", &pack_to_stdout, N_("output pack to stdout")), OPT_BOOL(0, "include-tag", &include_tag, @@ -2720,6 +2724,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) use_internal_rev_list = 1; argv_array_push(&rp, "--reflog"); } + if (rev_list_index) { + use_internal_rev_list = 1; + argv_array_push(&rp, "--indexed-objects"); + } if (rev_list_unpacked) { use_internal_rev_list = 1; argv_array_push(&rp, "--unpacked"); -- cgit v1.2.1 From b1e757f36377df1f2a6c165ebf171b09a8ad957b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 16 Oct 2014 20:44:54 -0400 Subject: pack-objects: double-check options before discarding objects When we are given an expiration time like --unpack-unreachable=2.weeks.ago, we avoid writing out old, unreachable loose objects entirely, under the assumption that running "prune" would simply delete them immediately anyway. However, this is only valid if we computed the same set of reachable objects as prune would. In practice, this is the case, because only git-repack uses the --unpack-unreachable option with an expiration, and it always feeds as many objects into the pack as possible. But we can double-check at runtime just to be sure. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 0cf95c9901..64123d4222 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2757,6 +2757,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (keep_unreachable && unpack_unreachable) die("--keep-unreachable and --unpack-unreachable are incompatible."); + if (!rev_list_all || !rev_list_reflog || !rev_list_index) + unpack_unreachable_expiration = 0; if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow()) use_bitmap_index = 0; -- cgit v1.2.1