From cd3799679533328dcf262549c9d6466b07628df1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 29 Jul 2016 00:10:31 -0400 Subject: pack-objects: break out of want_object loop early When pack-objects collects the list of objects to pack (either from stdin, or via its internal rev-list), it filters each one through want_object_in_pack(). This function loops through each existing packfile, looking for the object. When we find it, we mark the pack/offset combo for later use. However, we can't just return "yes, we want it" at that point. If --honor-pack-keep is in effect, we must keep looking to find it in _all_ packs, to make sure none of them has a .keep. Likewise, if --local is in effect, we must make sure it is not present in any non-local pack. As a result, the sum effort of these calls is effectively O(nr_objects * nr_packs). In an ordinary repository, we have only a handful of packs, and this doesn't make a big difference. But in pathological cases, it can slow the counting phase to a crawl. This patch notices the case that we have neither "--local" nor "--honor-pack-keep" in effect and breaks out of the loop early, after finding the first instance. Note that our worst case is still "objects * packs" (i.e., we might find each object in the last pack we look in), but in practice we will often break out early. On an "average" repo, my git.git with 8 packs, this shows a modest 2% (a few dozen milliseconds) improvement in the counting-objects phase of "git pack-objects --all Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index a2f8cfdec0..8ad11f2110 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -977,6 +977,22 @@ static int want_object_in_pack(const unsigned char *sha1, return 1; if (incremental) return 0; + + /* + * When asked to do --local (do not include an + * object that appears in a pack we borrow + * from elsewhere) or --honor-pack-keep (do not + * include an object that appears in a pack marked + * with .keep), we need to make sure no copy of this + * object come from in _any_ pack that causes us to + * omit it, and need to complete this loop. When + * neither option is in effect, we know the object + * we just found is going to be packed, so break + * out of the loop to return 1 now. + */ + if (!ignore_packed_keep && !local) + break; + if (local && !p->pack_local) return 0; if (ignore_packed_keep && p->pack_local && p->pack_keep) -- cgit v1.2.1 From 56dfeb62638760fa78a442a97f19abf1af374d29 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 29 Jul 2016 00:11:31 -0400 Subject: pack-objects: compute local/ignore_pack_keep early In want_object_in_pack(), we can exit early from our loop if neither "local" nor "ignore_pack_keep" are set. If they are, however, we must examine each pack to see if it has the object and is non-local or has a ".keep". It's quite common for there to be no non-local or .keep packs at all, in which case we know ahead of time that looking further will be pointless. We can pre-compute this by simply iterating over the list of packs ahead of time, and dropping the flags if there are no packs that could match. Another similar strategy would be to modify the loop in want_object_in_pack() to notice that we have already found the object once, and that we are looping only to check for "local" and "keep" attributes. If a pack has neither of those, we can skip the call to find_pack_entry_one(), which is the expensive part of the loop. This has two advantages: - it isn't all-or-nothing; we still get some improvement when there's a small number of kept or non-local packs, and a large number of non-kept local packs - it eliminates any possible race where we add new non-local or kept packs after our initial scan. In practice, I don't think this race matters; we already cache the packed_git information, so somebody who adds a new pack or .keep file after we've started will not be noticed at all, unless we happen to need to call reprepare_packed_git() because a lookup fails. In other words, we're already racy, and the race is not a big deal (losing the race means we might include an object in the pack that would not otherwise be, which is an acceptable outcome). However, it also has a disadvantage: we still loop over the rest of the packs for each object to check their flags. This is much less expensive than doing the object lookup, but still not free. So if we wanted to implement that strategy to cover the non-all-or-nothing cases, we could do so in addition to this one (so you get the most speedup in the all-or-nothing case, and the best we can do in the other cases). But given that the all-or-nothing case is likely the most common, it is probably not worth the trouble, and we can revisit this later if evidence points otherwise. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 8ad11f2110..c4c2a3c79d 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -46,6 +46,7 @@ static int keep_unreachable, unpack_unreachable, include_tag; static unsigned long unpack_unreachable_expiration; static int pack_loose_unreachable; static int local; +static int have_non_local_packs; static int incremental; static int ignore_packed_keep; static int allow_ofs_delta; @@ -990,7 +991,8 @@ static int want_object_in_pack(const unsigned char *sha1, * we just found is going to be packed, so break * out of the loop to return 1 now. */ - if (!ignore_packed_keep && !local) + if (!ignore_packed_keep && + (!local || !have_non_local_packs)) break; if (local && !p->pack_local) @@ -2799,6 +2801,28 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) progress = 2; prepare_packed_git(); + if (ignore_packed_keep) { + struct packed_git *p; + for (p = packed_git; p; p = p->next) + if (p->pack_local && p->pack_keep) + break; + if (!p) /* no keep-able packs found */ + ignore_packed_keep = 0; + } + if (local) { + /* + * unlike ignore_packed_keep above, we do not want to + * unset "local" based on looking at packs, as it + * also covers non-local objects + */ + struct packed_git *p; + for (p = packed_git; p; p = p->next) { + if (!p->pack_local) { + have_non_local_packs = 1; + break; + } + } + } if (progress) progress_state = start_progress(_("Counting objects"), 0); -- cgit v1.2.1