diff options
author | Junio C Hamano <gitster@pobox.com> | 2018-02-13 13:39:03 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2018-02-13 13:39:03 -0800 |
commit | f3d618d2bf4099abe99babc8b56dcd483c5eec71 (patch) | |
tree | 3e906f2c4a72698cf5865016b5d652047d8260a0 | |
parent | ed1b87ef910fe38dfb9cf044f5c946adfab0c5e3 (diff) | |
parent | 0c16cd499dee09b7fc6dd10cb6a476e96c147ef6 (diff) | |
download | git-f3d618d2bf4099abe99babc8b56dcd483c5eec71.tar.gz |
Merge branch 'jh/fsck-promisors'
In preparation for implementing narrow/partial clone, the machinery
for checking object connectivity used by gc and fsck has been
taught that a missing object is OK when it is referenced by a
packfile specially marked as coming from trusted repository that
promises to make them available on-demand and lazily.
* jh/fsck-promisors:
gc: do not repack promisor packfiles
rev-list: support termination at promisor objects
sha1_file: support lazily fetching missing objects
introduce fetch-object: fetch one promisor object
index-pack: refactor writing of .keep files
fsck: support promisor objects as CLI argument
fsck: support referenced promisor objects
fsck: support refs pointing to promisor objects
fsck: introduce partialclone extension
extension.partialclone: introduce partial clone extension
-rw-r--r-- | Documentation/git-pack-objects.txt | 11 | ||||
-rw-r--r-- | Documentation/gitremote-helpers.txt | 7 | ||||
-rw-r--r-- | Documentation/rev-list-options.txt | 11 | ||||
-rw-r--r-- | Documentation/technical/repository-version.txt | 12 | ||||
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | builtin/cat-file.c | 2 | ||||
-rw-r--r-- | builtin/fetch-pack.c | 10 | ||||
-rw-r--r-- | builtin/fsck.c | 26 | ||||
-rw-r--r-- | builtin/gc.c | 3 | ||||
-rw-r--r-- | builtin/index-pack.c | 111 | ||||
-rw-r--r-- | builtin/pack-objects.c | 37 | ||||
-rw-r--r-- | builtin/prune.c | 7 | ||||
-rw-r--r-- | builtin/repack.c | 8 | ||||
-rw-r--r-- | builtin/rev-list.c | 69 | ||||
-rw-r--r-- | cache.h | 13 | ||||
-rw-r--r-- | environment.c | 1 | ||||
-rw-r--r-- | fetch-object.c | 27 | ||||
-rw-r--r-- | fetch-object.h | 6 | ||||
-rw-r--r-- | fetch-pack.c | 48 | ||||
-rw-r--r-- | fetch-pack.h | 8 | ||||
-rw-r--r-- | list-objects.c | 29 | ||||
-rw-r--r-- | object.c | 2 | ||||
-rw-r--r-- | packfile.c | 77 | ||||
-rw-r--r-- | packfile.h | 13 | ||||
-rw-r--r-- | remote-curl.c | 14 | ||||
-rw-r--r-- | revision.c | 33 | ||||
-rw-r--r-- | revision.h | 5 | ||||
-rw-r--r-- | setup.c | 7 | ||||
-rw-r--r-- | sha1_file.c | 32 | ||||
-rwxr-xr-x | t/t0410-partial-clone.sh | 343 | ||||
-rw-r--r-- | transport.c | 8 | ||||
-rw-r--r-- | transport.h | 11 |
32 files changed, 896 insertions, 96 deletions
diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index aa403d02f3..81bc490ac5 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -255,6 +255,17 @@ a missing object is encountered. This is the default action. The form '--missing=allow-any' will allow object traversal to continue if a missing object is encountered. Missing objects will silently be omitted from the results. ++ +The form '--missing=allow-promisor' is like 'allow-any', but will only +allow object traversal to continue for EXPECTED promisor missing objects. +Unexpected missing object will raise an error. + +--exclude-promisor-objects:: + Omit objects that are known to be in the promisor remote. (This + option has the purpose of operating only on locally created objects, + so that when we repack, we still maintain a distinction between + locally created objects [without .promisor] and objects from the + promisor remote [with .promisor].) This is used with partial clone. SEE ALSO -------- diff --git a/Documentation/gitremote-helpers.txt b/Documentation/gitremote-helpers.txt index 4a584f3c5d..4b8c93ec59 100644 --- a/Documentation/gitremote-helpers.txt +++ b/Documentation/gitremote-helpers.txt @@ -466,6 +466,13 @@ set by Git if the remote helper has the 'option' capability. Transmit <string> as a push option. As the push option must not contain LF or NUL characters, the string is not encoded. +'option from-promisor' {'true'|'false'}:: + Indicate that these objects are being fetched from a promisor. + +'option no-dependents' {'true'|'false'}:: + Indicate that only the objects wanted need to be fetched, not + their dependents. + SEE ALSO -------- linkgit:git-remote[1] diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 22f5c9b43d..7b273635de 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -750,10 +750,21 @@ The form '--missing=allow-any' will allow object traversal to continue if a missing object is encountered. Missing objects will silently be omitted from the results. + +The form '--missing=allow-promisor' is like 'allow-any', but will only +allow object traversal to continue for EXPECTED promisor missing objects. +Unexpected missing objects will raise an error. ++ The form '--missing=print' is like 'allow-any', but will also print a list of the missing objects. Object IDs are prefixed with a ``?'' character. endif::git-rev-list[] +--exclude-promisor-objects:: + (For internal use only.) Prefilter object traversal at + promisor boundary. This is used with partial clone. This is + stronger than `--missing=allow-promisor` because it limits the + traversal, rather than just silencing errors about missing + objects. + --no-walk[=(sorted|unsorted)]:: Only show the given commits, but do not traverse their ancestors. This has no effect if a range is specified. If the argument diff --git a/Documentation/technical/repository-version.txt b/Documentation/technical/repository-version.txt index 00ad37986e..e03eaccebc 100644 --- a/Documentation/technical/repository-version.txt +++ b/Documentation/technical/repository-version.txt @@ -86,3 +86,15 @@ for testing format-1 compatibility. When the config key `extensions.preciousObjects` is set to `true`, objects in the repository MUST NOT be deleted (e.g., by `git-prune` or `git repack -d`). + +`partialclone` +~~~~~~~~~~~~~~ + +When the config key `extensions.partialclone` is set, it indicates +that the repo was created with a partial clone (or later performed +a partial fetch) and that the remote may have omitted sending +certain unwanted objects. Such a remote is called a "promisor remote" +and it promises that all such omitted objects can be fetched from it +in the future. + +The value of this key is the name of the promisor remote. @@ -802,6 +802,7 @@ LIB_OBJS += ewah/ewah_bitmap.o LIB_OBJS += ewah/ewah_io.o LIB_OBJS += ewah/ewah_rlw.o LIB_OBJS += exec_cmd.o +LIB_OBJS += fetch-object.o LIB_OBJS += fetch-pack.o LIB_OBJS += fsck.o LIB_OBJS += fsmonitor.o diff --git a/builtin/cat-file.c b/builtin/cat-file.c index f5fa4fd75a..cf9ea5c796 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -475,6 +475,8 @@ static int batch_objects(struct batch_options *opt) for_each_loose_object(batch_loose_object, &sa, 0); for_each_packed_object(batch_packed_object, &sa, 0); + if (repository_format_partial_clone) + warning("This repository has extensions.partialClone set. Some objects may not be loaded."); cb.opt = opt; cb.expand = &data; diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 366b9d13f9..15eeed7b17 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -53,6 +53,8 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) struct oid_array shallow = OID_ARRAY_INIT; struct string_list deepen_not = STRING_LIST_INIT_DUP; + fetch_if_missing = 0; + packet_trace_identity("fetch-pack"); memset(&args, 0, sizeof(args)); @@ -143,6 +145,14 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) args.update_shallow = 1; continue; } + if (!strcmp("--from-promisor", arg)) { + args.from_promisor = 1; + continue; + } + if (!strcmp("--no-dependents", arg)) { + args.no_dependents = 1; + continue; + } usage(fetch_pack_usage); } if (deepen_not.nr) diff --git a/builtin/fsck.c b/builtin/fsck.c index 04846d46f9..7a8a679d4f 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -149,6 +149,15 @@ static int mark_object(struct object *obj, int type, void *data, struct fsck_opt if (obj->flags & REACHABLE) return 0; obj->flags |= REACHABLE; + + if (is_promisor_object(&obj->oid)) + /* + * Further recursion does not need to be performed on this + * object since it is a promisor object (so it does not need to + * be added to "pending"). + */ + return 0; + if (!(obj->flags & HAS_OBJ)) { if (parent && !has_object_file(&obj->oid)) { printf("broken link from %7s %s\n", @@ -208,6 +217,8 @@ static void check_reachable_object(struct object *obj) * do a full fsck */ if (!(obj->flags & HAS_OBJ)) { + if (is_promisor_object(&obj->oid)) + return; if (has_sha1_pack(obj->oid.hash)) return; /* it is in pack - forget about it */ printf("missing %s %s\n", printable_type(obj), @@ -398,7 +409,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid, xstrfmt("%s@{%"PRItime"}", refname, timestamp)); obj->flags |= USED; mark_object_reachable(obj); - } else { + } else if (!is_promisor_object(oid)) { error("%s: invalid reflog entry %s", refname, oid_to_hex(oid)); errors_found |= ERROR_REACHABLE; } @@ -434,6 +445,14 @@ static int fsck_handle_ref(const char *refname, const struct object_id *oid, obj = parse_object(oid); if (!obj) { + if (is_promisor_object(oid)) { + /* + * Increment default_refs anyway, because this is a + * valid ref. + */ + default_refs++; + return 0; + } error("%s: invalid sha1 pointer %s", refname, oid_to_hex(oid)); errors_found |= ERROR_REACHABLE; /* We'll continue with the rest despite the error.. */ @@ -659,6 +678,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) int i; struct alternate_object_database *alt; + /* fsck knows how to handle missing promisor objects */ + fetch_if_missing = 0; + errors_found = 0; check_replace_refs = 0; @@ -731,6 +753,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) struct object *obj = lookup_object(oid.hash); if (!obj || !(obj->flags & HAS_OBJ)) { + if (is_promisor_object(&oid)) + continue; error("%s: object missing", oid_to_hex(&oid)); errors_found |= ERROR_OBJECT; continue; diff --git a/builtin/gc.c b/builtin/gc.c index 3c5eae0edf..77fa720bd0 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -458,6 +458,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix) argv_array_push(&prune, prune_expire); if (quiet) argv_array_push(&prune, "--no-progress"); + if (repository_format_partial_clone) + argv_array_push(&prune, + "--exclude-promisor-objects"); if (run_command_v_opt(prune.argv, RUN_GIT_CMD)) return error(FAILED_RUN, prune.argv[0]); } diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 4c51aec81f..5ebd370c56 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1389,15 +1389,60 @@ static void fix_unresolved_deltas(struct sha1file *f) free(sorted_by_pos); } +static const char *derive_filename(const char *pack_name, const char *suffix, + struct strbuf *buf) +{ + size_t len; + if (!strip_suffix(pack_name, ".pack", &len)) + die(_("packfile name '%s' does not end with '.pack'"), + pack_name); + strbuf_add(buf, pack_name, len); + strbuf_addch(buf, '.'); + strbuf_addstr(buf, suffix); + return buf->buf; +} + +static void write_special_file(const char *suffix, const char *msg, + const char *pack_name, const unsigned char *sha1, + const char **report) +{ + struct strbuf name_buf = STRBUF_INIT; + const char *filename; + int fd; + int msg_len = strlen(msg); + + if (pack_name) + filename = derive_filename(pack_name, suffix, &name_buf); + else + filename = odb_pack_name(&name_buf, sha1, suffix); + + fd = odb_pack_keep(filename); + if (fd < 0) { + if (errno != EEXIST) + die_errno(_("cannot write %s file '%s'"), + suffix, filename); + } else { + if (msg_len > 0) { + write_or_die(fd, msg, msg_len); + write_or_die(fd, "\n", 1); + } + if (close(fd) != 0) + die_errno(_("cannot close written %s file '%s'"), + suffix, filename); + if (report) + *report = suffix; + } + strbuf_release(&name_buf); +} + static void final(const char *final_pack_name, const char *curr_pack_name, const char *final_index_name, const char *curr_index_name, - const char *keep_name, const char *keep_msg, + const char *keep_msg, const char *promisor_msg, unsigned char *sha1) { const char *report = "pack"; struct strbuf pack_name = STRBUF_INIT; struct strbuf index_name = STRBUF_INIT; - struct strbuf keep_name_buf = STRBUF_INIT; int err; if (!from_stdin) { @@ -1409,28 +1454,12 @@ static void final(const char *final_pack_name, const char *curr_pack_name, die_errno(_("error while closing pack file")); } - if (keep_msg) { - int keep_fd, keep_msg_len = strlen(keep_msg); - - if (!keep_name) - keep_name = odb_pack_name(&keep_name_buf, sha1, "keep"); - - keep_fd = odb_pack_keep(keep_name); - if (keep_fd < 0) { - if (errno != EEXIST) - die_errno(_("cannot write keep file '%s'"), - keep_name); - } else { - if (keep_msg_len > 0) { - write_or_die(keep_fd, keep_msg, keep_msg_len); - write_or_die(keep_fd, "\n", 1); - } - if (close(keep_fd) != 0) - die_errno(_("cannot close written keep file '%s'"), - keep_name); - report = "keep"; - } - } + if (keep_msg) + write_special_file("keep", keep_msg, final_pack_name, sha1, + &report); + if (promisor_msg) + write_special_file("promisor", promisor_msg, final_pack_name, + sha1, NULL); if (final_pack_name != curr_pack_name) { if (!final_pack_name) @@ -1472,7 +1501,6 @@ static void final(const char *final_pack_name, const char *curr_pack_name, strbuf_release(&index_name); strbuf_release(&pack_name); - strbuf_release(&keep_name_buf); } static int git_index_pack_config(const char *k, const char *v, void *cb) @@ -1615,32 +1643,26 @@ static void show_pack_info(int stat_only) } } -static const char *derive_filename(const char *pack_name, const char *suffix, - struct strbuf *buf) -{ - size_t len; - if (!strip_suffix(pack_name, ".pack", &len)) - die(_("packfile name '%s' does not end with '.pack'"), - pack_name); - strbuf_add(buf, pack_name, len); - strbuf_addstr(buf, suffix); - return buf->buf; -} - int cmd_index_pack(int argc, const char **argv, const char *prefix) { int i, fix_thin_pack = 0, verify = 0, stat_only = 0; const char *curr_index; const char *index_name = NULL, *pack_name = NULL; - const char *keep_name = NULL, *keep_msg = NULL; - struct strbuf index_name_buf = STRBUF_INIT, - keep_name_buf = STRBUF_INIT; + const char *keep_msg = NULL; + const char *promisor_msg = NULL; + struct strbuf index_name_buf = STRBUF_INIT; struct pack_idx_entry **idx_objects; struct pack_idx_option opts; unsigned char pack_sha1[20]; unsigned foreign_nr = 1; /* zero is a "good" value, assume bad */ int report_end_of_input = 0; + /* + * index-pack never needs to fetch missing objects, since it only + * accesses the repo to do hash collision checks + */ + fetch_if_missing = 0; + if (argc == 2 && !strcmp(argv[1], "-h")) usage(index_pack_usage); @@ -1678,6 +1700,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) stat_only = 1; } else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) { ; /* nothing to do */ + } else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) { + ; /* already parsed */ } else if (starts_with(arg, "--threads=")) { char *end; nr_threads = strtoul(arg+10, &end, 0); @@ -1740,9 +1764,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (from_stdin && !startup_info->have_repository) die(_("--stdin requires a git repository")); if (!index_name && pack_name) - index_name = derive_filename(pack_name, ".idx", &index_name_buf); - if (keep_msg && !keep_name && pack_name) - keep_name = derive_filename(pack_name, ".keep", &keep_name_buf); + index_name = derive_filename(pack_name, "idx", &index_name_buf); if (verify) { if (!index_name) @@ -1790,13 +1812,12 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) if (!verify) final(pack_name, curr_pack, index_name, curr_index, - keep_name, keep_msg, + keep_msg, promisor_msg, pack_sha1); else close(input_fd); free(objects); strbuf_release(&index_name_buf); - strbuf_release(&keep_name_buf); if (pack_name == NULL) free((void *) curr_pack); if (index_name == NULL) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 6b9cfc289d..6c71552cdf 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -75,6 +75,8 @@ static int use_bitmap_index = -1; static int write_bitmap_index; static uint16_t write_bitmap_options; +static int exclude_promisor_objects; + static unsigned long delta_cache_size = 0; static unsigned long max_delta_cache_size = 256 * 1024 * 1024; static unsigned long cache_max_small_delta_size = 1000; @@ -84,8 +86,9 @@ static unsigned long window_memory_limit = 0; static struct list_objects_filter_options filter_options; enum missing_action { - MA_ERROR = 0, /* fail if any missing objects are encountered */ - MA_ALLOW_ANY, /* silently allow ALL missing objects */ + MA_ERROR = 0, /* fail if any missing objects are encountered */ + MA_ALLOW_ANY, /* silently allow ALL missing objects */ + MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */ }; static enum missing_action arg_missing_action; static show_object_fn fn_show_object; @@ -2578,6 +2581,20 @@ static void show_object__ma_allow_any(struct object *obj, const char *name, void show_object(obj, name, data); } +static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data) +{ + assert(arg_missing_action == MA_ALLOW_PROMISOR); + + /* + * Quietly ignore EXPECTED missing objects. This avoids problems with + * staging them now and getting an odd error later. + */ + if (!has_object_file(&obj->oid) && is_promisor_object(&obj->oid)) + return; + + show_object(obj, name, data); +} + static int option_parse_missing_action(const struct option *opt, const char *arg, int unset) { @@ -2592,10 +2609,18 @@ static int option_parse_missing_action(const struct option *opt, if (!strcmp(arg, "allow-any")) { arg_missing_action = MA_ALLOW_ANY; + fetch_if_missing = 0; fn_show_object = show_object__ma_allow_any; return 0; } + if (!strcmp(arg, "allow-promisor")) { + arg_missing_action = MA_ALLOW_PROMISOR; + fetch_if_missing = 0; + fn_show_object = show_object__ma_allow_promisor; + return 0; + } + die(_("invalid value for --missing")); return 0; } @@ -3009,6 +3034,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) { OPTION_CALLBACK, 0, "missing", NULL, N_("action"), N_("handling for missing objects"), PARSE_OPT_NONEG, option_parse_missing_action }, + OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects, + N_("do not pack objects in promisor packfiles")), OPT_END(), }; @@ -3054,6 +3081,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) argv_array_push(&rp, "--unpacked"); } + if (exclude_promisor_objects) { + use_internal_rev_list = 1; + fetch_if_missing = 0; + argv_array_push(&rp, "--exclude-promisor-objects"); + } + if (!reuse_object) reuse_delta = 0; if (pack_compression_level == -1) diff --git a/builtin/prune.c b/builtin/prune.c index d2fdae680a..4cfec82f40 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -101,12 +101,15 @@ int cmd_prune(int argc, const char **argv, const char *prefix) { struct rev_info revs; struct progress *progress = NULL; + int exclude_promisor_objects = 0; const struct option options[] = { OPT__DRY_RUN(&show_only, N_("do not remove, show only")), OPT__VERBOSE(&verbose, N_("report pruned objects")), OPT_BOOL(0, "progress", &show_progress, N_("show progress")), OPT_EXPIRY_DATE(0, "expire", &expire, N_("expire objects older than <time>")), + OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects, + N_("limit traversal to objects outside promisor packfiles")), OPT_END() }; char *s; @@ -139,6 +142,10 @@ int cmd_prune(int argc, const char **argv, const char *prefix) show_progress = isatty(2); if (show_progress) progress = start_delayed_progress(_("Checking connectivity"), 0); + if (exclude_promisor_objects) { + fetch_if_missing = 0; + revs.exclude_promisor_objects = 1; + } mark_reachable_objects(&revs, 1, expire, progress); stop_progress(&progress); diff --git a/builtin/repack.c b/builtin/repack.c index f17a68a17d..7bdb40142f 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -83,7 +83,8 @@ static void remove_pack_on_signal(int signo) /* * Adds all packs hex strings to the fname list, which do not - * have a corresponding .keep file. + * have a corresponding .keep or .promisor file. These packs are not to + * be kept if we are going to pack everything into one file. */ static void get_non_kept_pack_filenames(struct string_list *fname_list) { @@ -101,7 +102,8 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list) fname = xmemdupz(e->d_name, len); - if (!file_exists(mkpath("%s/%s.keep", packdir, fname))) + if (!file_exists(mkpath("%s/%s.keep", packdir, fname)) && + !file_exists(mkpath("%s/%s.promisor", packdir, fname))) string_list_append_nodup(fname_list, fname); else free(fname); @@ -232,6 +234,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) argv_array_push(&cmd.args, "--all"); argv_array_push(&cmd.args, "--reflog"); argv_array_push(&cmd.args, "--indexed-objects"); + if (repository_format_partial_clone) + argv_array_push(&cmd.args, "--exclude-promisor-objects"); if (window) argv_array_pushf(&cmd.args, "--window=%s", window); if (window_memory) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index d5345b6a2e..e27aa1fc07 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -15,6 +15,7 @@ #include "progress.h" #include "reflog-walk.h" #include "oidset.h" +#include "packfile.h" static const char rev_list_usage[] = "git rev-list [OPTION] <commit-id>... [ -- paths... ]\n" @@ -67,6 +68,7 @@ enum missing_action { MA_ERROR = 0, /* fail if any missing objects are encountered */ MA_ALLOW_ANY, /* silently allow ALL missing objects */ MA_PRINT, /* print ALL missing objects in special section */ + MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */ }; static enum missing_action arg_missing_action; @@ -197,6 +199,12 @@ static void finish_commit(struct commit *commit, void *data) static inline void finish_object__ma(struct object *obj) { + /* + * Whether or not we try to dynamically fetch missing objects + * from the server, we currently DO NOT have the object. We + * can either print, allow (ignore), or conditionally allow + * (ignore) them. + */ switch (arg_missing_action) { case MA_ERROR: die("missing blob object '%s'", oid_to_hex(&obj->oid)); @@ -209,25 +217,36 @@ static inline void finish_object__ma(struct object *obj) oidset_insert(&missing_objects, &obj->oid); return; + case MA_ALLOW_PROMISOR: + if (is_promisor_object(&obj->oid)) + return; + die("unexpected missing blob object '%s'", + oid_to_hex(&obj->oid)); + return; + default: BUG("unhandled missing_action"); return; } } -static void finish_object(struct object *obj, const char *name, void *cb_data) +static int finish_object(struct object *obj, const char *name, void *cb_data) { struct rev_list_info *info = cb_data; - if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid)) + if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid)) { finish_object__ma(obj); + return 1; + } if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT) parse_object(&obj->oid); + return 0; } static void show_object(struct object *obj, const char *name, void *cb_data) { struct rev_list_info *info = cb_data; - finish_object(obj, name, cb_data); + if (finish_object(obj, name, cb_data)) + return; display_progress(progress, ++progress_counter); if (info->flags & REV_LIST_QUIET) return; @@ -315,11 +334,19 @@ static inline int parse_missing_action_value(const char *value) if (!strcmp(value, "allow-any")) { arg_missing_action = MA_ALLOW_ANY; + fetch_if_missing = 0; return 1; } if (!strcmp(value, "print")) { arg_missing_action = MA_PRINT; + fetch_if_missing = 0; + return 1; + } + + if (!strcmp(value, "allow-promisor")) { + arg_missing_action = MA_ALLOW_PROMISOR; + fetch_if_missing = 0; return 1; } @@ -344,6 +371,35 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) init_revisions(&revs, prefix); revs.abbrev = DEFAULT_ABBREV; revs.commit_format = CMIT_FMT_UNSPECIFIED; + + /* + * Scan the argument list before invoking setup_revisions(), so that we + * know if fetch_if_missing needs to be set to 0. + * + * "--exclude-promisor-objects" acts as a pre-filter on missing objects + * by not crossing the boundary from realized objects to promisor + * objects. + * + * Let "--missing" to conditionally set fetch_if_missing. + */ + for (i = 1; i < argc; i++) { + const char *arg = argv[i]; + if (!strcmp(arg, "--exclude-promisor-objects")) { + fetch_if_missing = 0; + revs.exclude_promisor_objects = 1; + break; + } + } + for (i = 1; i < argc; i++) { + const char *arg = argv[i]; + if (skip_prefix(arg, "--missing=", &arg)) { + if (revs.exclude_promisor_objects) + die(_("cannot combine --exclude-promisor-objects and --missing")); + if (parse_missing_action_value(arg)) + break; + } + } + argc = setup_revisions(argc, argv, &revs, NULL); memset(&info, 0, sizeof(info)); @@ -412,9 +468,10 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) continue; } - if (skip_prefix(arg, "--missing=", &arg) && - parse_missing_action_value(arg)) - continue; + if (!strcmp(arg, "--exclude-promisor-objects")) + continue; /* already handled above */ + if (skip_prefix(arg, "--missing=", &arg)) + continue; /* already handled above */ usage(rev_list_usage); @@ -914,10 +914,12 @@ extern int grafts_replace_parents; #define GIT_REPO_VERSION 0 #define GIT_REPO_VERSION_READ 1 extern int repository_format_precious_objects; +extern char *repository_format_partial_clone; struct repository_format { int version; int precious_objects; + char *partial_clone; /* value of extensions.partialclone */ int is_bare; int hash_algo; char *work_tree; @@ -1648,7 +1650,8 @@ extern struct packed_git { unsigned pack_local:1, pack_keep:1, freshened:1, - do_not_close:1; + do_not_close:1, + pack_promisor:1; unsigned char sha1[20]; struct revindex_entry *revindex; /* something like ".git/objects/pack/xxxxx.pack" */ @@ -1787,6 +1790,14 @@ struct object_info { #define OBJECT_INFO_QUICK 8 extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags); +/* + * Set this to 0 to prevent sha1_object_info_extended() from fetching missing + * blobs. This has a difference only if extensions.partialClone is set. + * + * Its default value is 1. + */ +extern int fetch_if_missing; + /* Dumb servers support */ extern int update_server_info(int); diff --git a/environment.c b/environment.c index 63ac38a46f..835bb75c4e 100644 --- a/environment.c +++ b/environment.c @@ -27,6 +27,7 @@ int warn_ambiguous_refs = 1; int warn_on_object_refname_ambiguity = 1; int ref_paranoia = -1; int repository_format_precious_objects; +char *repository_format_partial_clone; const char *git_commit_encoding; const char *git_log_output_encoding; const char *apply_default_whitespace; diff --git a/fetch-object.c b/fetch-object.c new file mode 100644 index 0000000000..258fcfac75 --- /dev/null +++ b/fetch-object.c @@ -0,0 +1,27 @@ +#include "cache.h" +#include "packfile.h" +#include "pkt-line.h" +#include "strbuf.h" +#include "transport.h" +#include "fetch-object.h" + +void fetch_object(const char *remote_name, const unsigned char *sha1) +{ + struct remote *remote; + struct transport *transport; + struct ref *ref; + int original_fetch_if_missing = fetch_if_missing; + + fetch_if_missing = 0; + remote = remote_get(remote_name); + if (!remote->url[0]) + die(_("Remote with no URL")); + transport = transport_get(remote, remote->url[0]); + + ref = alloc_ref(sha1_to_hex(sha1)); + hashcpy(ref->old_oid.hash, sha1); + transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); + transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1"); + transport_fetch_refs(transport, ref); + fetch_if_missing = original_fetch_if_missing; +} diff --git a/fetch-object.h b/fetch-object.h new file mode 100644 index 0000000000..f371300c88 --- /dev/null +++ b/fetch-object.h @@ -0,0 +1,6 @@ +#ifndef FETCH_OBJECT_H +#define FETCH_OBJECT_H + +extern void fetch_object(const char *remote_name, const unsigned char *sha1); + +#endif diff --git a/fetch-pack.c b/fetch-pack.c index 9f6b07ad91..7aa1f58995 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -450,6 +450,8 @@ static int find_common(struct fetch_pack_args *args, flushes = 0; retval = -1; + if (args->no_dependents) + goto done; while ((oid = get_rev())) { packet_buf_write(&req_buf, "have %s\n", oid_to_hex(oid)); print_verbose(args, "have %s", oid_to_hex(oid)); @@ -735,29 +737,31 @@ static int everything_local(struct fetch_pack_args *args, } } - if (!args->deepen) { - for_each_ref(mark_complete_oid, NULL); - for_each_cached_alternate(mark_alternate_complete); - commit_list_sort_by_date(&complete); - if (cutoff) - mark_recent_complete_commits(args, cutoff); - } + if (!args->no_dependents) { + if (!args->deepen) { + for_each_ref(mark_complete_oid, NULL); + for_each_cached_alternate(mark_alternate_complete); + commit_list_sort_by_date(&complete); + if (cutoff) + mark_recent_complete_commits(args, cutoff); + } - /* - * Mark all complete remote refs as common refs. - * Don't mark them common yet; the server has to be told so first. - */ - for (ref = *refs; ref; ref = ref->next) { - struct object *o = deref_tag(lookup_object(ref->old_oid.hash), - NULL, 0); + /* + * Mark all complete remote refs as common refs. + * Don't mark them common yet; the server has to be told so first. + */ + for (ref = *refs; ref; ref = ref->next) { + struct object *o = deref_tag(lookup_object(ref->old_oid.hash), + NULL, 0); - if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE)) - continue; + if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE)) + continue; - if (!(o->flags & SEEN)) { - rev_list_push((struct commit *)o, COMMON_REF | SEEN); + if (!(o->flags & SEEN)) { + rev_list_push((struct commit *)o, COMMON_REF | SEEN); - mark_common((struct commit *)o, 1, 1); + mark_common((struct commit *)o, 1, 1); + } } } @@ -833,7 +837,7 @@ static int get_pack(struct fetch_pack_args *args, argv_array_push(&cmd.args, alternate_shallow_file); } - if (do_keep) { + if (do_keep || args->from_promisor) { if (pack_lockfile) cmd.out = -1; cmd_name = "index-pack"; @@ -843,7 +847,7 @@ static int get_pack(struct fetch_pack_args *args, argv_array_push(&cmd.args, "-v"); if (args->use_thin_pack) argv_array_push(&cmd.args, "--fix-thin"); - if (args->lock_pack || unpack_limit) { + if (do_keep && (args->lock_pack || unpack_limit)) { char hostname[HOST_NAME_MAX + 1]; if (xgethostname(hostname, sizeof(hostname))) xsnprintf(hostname, sizeof(hostname), "localhost"); @@ -853,6 +857,8 @@ static int get_pack(struct fetch_pack_args *args, } if (args->check_self_contained_and_connected) argv_array_push(&cmd.args, "--check-self-contained-and-connected"); + if (args->from_promisor) + argv_array_push(&cmd.args, "--promisor"); } else { cmd_name = "unpack-objects"; diff --git a/fetch-pack.h b/fetch-pack.h index b6aeb43a8e..aeac152644 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -29,6 +29,14 @@ struct fetch_pack_args { unsigned cloning:1; unsigned update_shallow:1; unsigned deepen:1; + unsigned from_promisor:1; + + /* + * If 1, fetch_pack() will also not modify any object flags. + * This allows fetch_pack() to safely be called by any function, + * regardless of which object flags it uses (if any). + */ + unsigned no_dependents:1; }; /* diff --git a/list-objects.c b/list-objects.c index 0966cdc9fa..168bef688a 100644 --- a/list-objects.c +++ b/list-objects.c @@ -9,6 +9,7 @@ #include "list-objects.h" #include "list-objects-filter.h" #include "list-objects-filter-options.h" +#include "packfile.h" static void process_blob(struct rev_info *revs, struct blob *blob, @@ -30,6 +31,20 @@ static void process_blob(struct rev_info *revs, if (obj->flags & (UNINTERESTING | SEEN)) return; + /* + * Pre-filter known-missing objects when explicitly requested. + * Otherwise, a missing object error message may be reported + * later (depending on other filtering criteria). + * + * Note that this "--exclude-promisor-objects" pre-filtering + * may cause the actual filter to report an incomplete list + * of missing objects. + */ + if (revs->exclude_promisor_objects && + !has_object_file(&obj->oid) && + is_promisor_object(&obj->oid)) + return; + pathlen = path->len; strbuf_addstr(path, name); if (filter_fn) @@ -91,6 +106,8 @@ static void process_tree(struct rev_info *revs, all_entries_interesting: entry_not_interesting; int baselen = base->len; enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; + int gently = revs->ignore_missing_links || + revs->exclude_promisor_objects; if (!revs->tree_objects) return; @@ -98,9 +115,19 @@ static void process_tree(struct rev_info *revs, die("bad tree object"); if (obj->flags & (UNINTERESTING | SEEN)) return; - if (parse_tree_gently(tree, revs->ignore_missing_links) < 0) { + if (parse_tree_gently(tree, gently) < 0) { if (revs->ignore_missing_links) return; + + /* + * Pre-filter known-missing tree objects when explicitly + * requested. This may cause the actual filter to report + * an incomplete list of missing objects. + */ + if (revs->exclude_promisor_objects && + is_promisor_object(&obj->oid)) + return; + die("bad tree object %s", oid_to_hex(&obj->oid)); } @@ -252,7 +252,7 @@ struct object *parse_object(const struct object_id *oid) if (obj && obj->parsed) return obj; - if ((obj && obj->type == OBJ_BLOB) || + if ((obj && obj->type == OBJ_BLOB && has_object_file(oid)) || (!obj && has_object_file(oid) && sha1_object_info(oid->hash, NULL) == OBJ_BLOB)) { if (check_sha1_signature(repl, NULL, 0, NULL) < 0) { diff --git a/packfile.c b/packfile.c index 4a5fe7ab18..234797cde7 100644 --- a/packfile.c +++ b/packfile.c @@ -8,6 +8,11 @@ #include "list.h" #include "streaming.h" #include "sha1-lookup.h" +#include "commit.h" +#include "object.h" +#include "tag.h" +#include "tree-walk.h" +#include "tree.h" char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, @@ -643,10 +648,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local) return NULL; /* - * ".pack" is long enough to hold any suffix we're adding (and + * ".promisor" is long enough to hold any suffix we're adding (and * the use xsnprintf double-checks that) */ - alloc = st_add3(path_len, strlen(".pack"), 1); + alloc = st_add3(path_len, strlen(".promisor"), 1); p = alloc_packed_git(alloc); memcpy(p->pack_name, path, path_len); @@ -654,6 +659,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local) if (!access(p->pack_name, F_OK)) p->pack_keep = 1; + xsnprintf(p->pack_name + path_len, alloc - path_len, ".promisor"); + if (!access(p->pack_name, F_OK)) + p->pack_promisor = 1; + xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack"); if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) { free(p); @@ -781,7 +790,8 @@ static void prepare_packed_git_one(char *objdir, int local) if (ends_with(de->d_name, ".idx") || ends_with(de->d_name, ".pack") || ends_with(de->d_name, ".bitmap") || - ends_with(de->d_name, ".keep")) + ends_with(de->d_name, ".keep") || + ends_with(de->d_name, ".promisor")) string_list_append(&garbage, path.buf); else report_garbage(PACKDIR_FILE_GARBAGE, path.buf); @@ -1889,6 +1899,9 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags) for (p = packed_git; p; p = p->next) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; + if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; if (open_pack_index(p)) { pack_errors = 1; continue; @@ -1899,3 +1912,61 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags) } return r ? r : pack_errors; } + +static int add_promisor_object(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *set_) +{ + struct oidset *set = set_; + struct object *obj = parse_object(oid); + if (!obj) + return 1; + + oidset_insert(set, oid); + + /* + * If this is a tree, commit, or tag, the objects it refers + * to are also promisor objects. (Blobs refer to no objects.) + */ + if (obj->type == OBJ_TREE) { + struct tree *tree = (struct tree *)obj; + struct tree_desc desc; + struct name_entry entry; + if (init_tree_desc_gently(&desc, tree->buffer, tree->size)) + /* + * Error messages are given when packs are + * verified, so do not print any here. + */ + return 0; + while (tree_entry_gently(&desc, &entry)) + oidset_insert(set, entry.oid); + } else if (obj->type == OBJ_COMMIT) { + struct commit *commit = (struct commit *) obj; + struct commit_list *parents = commit->parents; + + oidset_insert(set, &commit->tree->object.oid); + for (; parents; parents = parents->next) + oidset_insert(set, &parents->item->object.oid); + } else if (obj->type == OBJ_TAG) { + struct tag *tag = (struct tag *) obj; + oidset_insert(set, &tag->tagged->oid); + } + return 0; +} + +int is_promisor_object(const struct object_id *oid) +{ + static struct oidset promisor_objects; + static int promisor_objects_prepared; + + if (!promisor_objects_prepared) { + if (repository_format_partial_clone) { + for_each_packed_object(add_promisor_object, + &promisor_objects, + FOR_EACH_OBJECT_PROMISOR_ONLY); + } + promisor_objects_prepared = 1; + } + return oidset_contains(&promisor_objects, oid); +} diff --git a/packfile.h b/packfile.h index 0cdeb54dcd..a7fca598d6 100644 --- a/packfile.h +++ b/packfile.h @@ -1,6 +1,8 @@ #ifndef PACKFILE_H #define PACKFILE_H +#include "oidset.h" + /* * Generate the filename to be used for a pack file with checksum "sha1" and * extension "ext". The result is written into the strbuf "buf", overwriting @@ -125,6 +127,11 @@ extern int has_sha1_pack(const unsigned char *sha1); extern int has_pack_index(const unsigned char *sha1); /* + * Only iterate over packs obtained from the promisor remote. + */ +#define FOR_EACH_OBJECT_PROMISOR_ONLY 2 + +/* * Iterate over packed objects in both the local * repository and any alternates repositories (unless the * FOR_EACH_OBJECT_LOCAL_ONLY flag, defined in cache.h, is set). @@ -135,4 +142,10 @@ typedef int each_packed_object_fn(const struct object_id *oid, void *data); extern int for_each_packed_object(each_packed_object_fn, void *, unsigned flags); +/* + * Return 1 if an object in a promisor packfile is or refers to the given + * object, 0 otherwise. + */ +extern int is_promisor_object(const struct object_id *oid); + #endif diff --git a/remote-curl.c b/remote-curl.c index 0053b09549..431839111a 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -33,7 +33,9 @@ struct options { thin : 1, /* One of the SEND_PACK_PUSH_CERT_* constants. */ push_cert : 2, - deepen_relative : 1; + deepen_relative : 1, + from_promisor : 1, + no_dependents : 1; }; static struct options options; static struct string_list cas_options = STRING_LIST_INIT_DUP; @@ -157,6 +159,12 @@ static int set_option(const char *name, const char *value) return -1; return 0; #endif /* LIBCURL_VERSION_NUM >= 0x070a08 */ + } else if (!strcmp(name, "from-promisor")) { + options.from_promisor = 1; + return 0; + } else if (!strcmp(name, "no-dependents")) { + options.no_dependents = 1; + return 0; } else { return 1 /* unsupported */; } @@ -822,6 +830,10 @@ static int fetch_git(struct discovery *heads, options.deepen_not.items[i].string); if (options.deepen_relative && options.depth) argv_array_push(&args, "--deepen-relative"); + if (options.from_promisor) + argv_array_push(&args, "--from-promisor"); + if (options.no_dependents) + argv_array_push(&args, "--no-dependents"); argv_array_push(&args, url.buf); for (i = 0; i < nr_heads; i++) { diff --git a/revision.c b/revision.c index a60628fbff..236d0d579f 100644 --- a/revision.c +++ b/revision.c @@ -198,6 +198,8 @@ static struct object *get_reference(struct rev_info *revs, const char *name, if (!object) { if (revs->ignore_missing) return object; + if (revs->exclude_promisor_objects && is_promisor_object(oid)) + return NULL; die("bad object %s", name); } object->flags |= flags; @@ -799,9 +801,17 @@ static int add_parents_to_list(struct rev_info *revs, struct commit *commit, for (parent = commit->parents; parent; parent = parent->next) { struct commit *p = parent->item; - - if (parse_commit_gently(p, revs->ignore_missing_links) < 0) + int gently = revs->ignore_missing_links || + revs->exclude_promisor_objects; + if (parse_commit_gently(p, gently) < 0) { + if (revs->exclude_promisor_objects && + is_promisor_object(&p->object.oid)) { + if (revs->first_parent_only) + break; + continue; + } return -1; + } if (revs->show_source && !p->util) p->util = commit->util; p->object.flags |= left_flag; @@ -2100,6 +2110,10 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->limited = 1; } else if (!strcmp(arg, "--ignore-missing")) { revs->ignore_missing = 1; + } else if (!strcmp(arg, "--exclude-promisor-objects")) { + if (fetch_if_missing) + die("BUG: exclude_promisor_objects can only be used when fetch_if_missing is 0"); + revs->exclude_promisor_objects = 1; } else { int opts = diff_opt_parse(&revs->diffopt, argv, argc, revs->prefix); if (!opts) @@ -2845,6 +2859,16 @@ void reset_revision_walk(void) clear_object_flags(SEEN | ADDED | SHOWN); } +static int mark_uninteresting(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *unused) +{ + struct object *o = parse_object(oid); + o->flags |= UNINTERESTING | SEEN; + return 0; +} + int prepare_revision_walk(struct rev_info *revs) { int i; @@ -2872,6 +2896,11 @@ int prepare_revision_walk(struct rev_info *revs) (revs->limited && limiting_can_increase_treesame(revs))) revs->treesame.name = "treesame"; + if (revs->exclude_promisor_objects) { + for_each_packed_object(mark_uninteresting, NULL, + FOR_EACH_OBJECT_PROMISOR_ONLY); + } + if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED) commit_list_sort_by_date(&revs->commits); if (revs->no_walk) diff --git a/revision.h b/revision.h index d7a35c8c9e..3dee97bfb9 100644 --- a/revision.h +++ b/revision.h @@ -122,7 +122,10 @@ struct rev_info { ancestry_path:1, first_parent_only:1, line_level_traverse:1, - tree_blobs_in_commit_order:1; + tree_blobs_in_commit_order:1, + + /* for internal use only */ + exclude_promisor_objects:1; /* Diff flags */ unsigned int diff:1, @@ -422,7 +422,11 @@ static int check_repo_format(const char *var, const char *value, void *vdata) ; else if (!strcmp(ext, "preciousobjects")) data->precious_objects = git_config_bool(var, value); - else + else if (!strcmp(ext, "partialclone")) { + if (!value) + return config_error_nonbool(var); + data->partial_clone = xstrdup(value); + } else string_list_append(&data->unknown_extensions, ext); } else if (strcmp(var, "core.bare") == 0) { data->is_bare = git_config_bool(var, value); @@ -464,6 +468,7 @@ static int check_repository_format_gently(const char *gitdir, struct repository_ } repository_format_precious_objects = candidate->precious_objects; + repository_format_partial_clone = candidate->partial_clone; string_list_clear(&candidate->unknown_extensions, 0); if (!has_common) { if (candidate->is_bare != -1) { diff --git a/sha1_file.c b/sha1_file.c index 3da70ac650..2e58f5560a 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -29,6 +29,7 @@ #include "mergesort.h" #include "quote.h" #include "packfile.h" +#include "fetch-object.h" const unsigned char null_sha1[GIT_MAX_RAWSZ]; const struct object_id null_oid; @@ -1213,6 +1214,8 @@ static int sha1_loose_object_info(const unsigned char *sha1, return (status < 0) ? status : 0; } +int fetch_if_missing = 1; + int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags) { static struct object_info blank_oi = OBJECT_INFO_INIT; @@ -1221,6 +1224,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, const unsigned char *real = (flags & OBJECT_INFO_LOOKUP_REPLACE) ? lookup_replace_object(sha1) : sha1; + int already_retried = 0; if (is_null_sha1(real)) return -1; @@ -1248,19 +1252,32 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, } } - if (!find_pack_entry(real, &e)) { + while (1) { + if (find_pack_entry(real, &e)) + break; + /* Most likely it's a loose object. */ if (!sha1_loose_object_info(real, oi, flags)) return 0; /* Not a loose object; someone else may have just packed it. */ - if (flags & OBJECT_INFO_QUICK) { - return -1; - } else { - reprepare_packed_git(); - if (!find_pack_entry(real, &e)) - return -1; + reprepare_packed_git(); + if (find_pack_entry(real, &e)) + break; + + /* Check if it is a missing object */ + if (fetch_if_missing && repository_format_partial_clone && + !already_retried) { + /* + * TODO Investigate haveing fetch_object() return + * TODO error/success and stopping the music here. + */ + fetch_object(repository_format_partial_clone, real); + already_retried = 1; + continue; } + + return -1; } if (oi == &blank_oi) @@ -1269,7 +1286,6 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, * information below, so return early. */ return 0; - rtype = packed_object_info(e.p, e.offset, oi); if (rtype < 0) { mark_bad_packed_object(e.p, real); diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh new file mode 100755 index 0000000000..cc18b75c03 --- /dev/null +++ b/t/t0410-partial-clone.sh @@ -0,0 +1,343 @@ +#!/bin/sh + +test_description='partial clone' + +. ./test-lib.sh + +delete_object () { + rm $1/.git/objects/$(echo $2 | sed -e 's|^..|&/|') +} + +pack_as_from_promisor () { + HASH=$(git -C repo pack-objects .git/objects/pack/pack) && + >repo/.git/objects/pack/pack-$HASH.promisor && + echo $HASH +} + +promise_and_delete () { + HASH=$(git -C repo rev-parse "$1") && + git -C repo tag -a -m message my_annotated_tag "$HASH" && + git -C repo rev-parse my_annotated_tag | pack_as_from_promisor && + # tag -d prints a message to stdout, so redirect it + git -C repo tag -d my_annotated_tag >/dev/null && + delete_object repo "$HASH" +} + +test_expect_success 'missing reflog object, but promised by a commit, passes fsck' ' + test_create_repo repo && + test_commit -C repo my_commit && + + A=$(git -C repo commit-tree -m a HEAD^{tree}) && + C=$(git -C repo commit-tree -m c -p $A HEAD^{tree}) && + + # Reference $A only from reflog, and delete it + git -C repo branch my_branch "$A" && + git -C repo branch -f my_branch my_commit && + delete_object repo "$A" && + + # State that we got $C, which refers to $A, from promisor + printf "$C\n" | pack_as_from_promisor && + + # Normally, it fails + test_must_fail git -C repo fsck && + + # But with the extension, it succeeds + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo fsck +' + +test_expect_success 'missing reflog object, but promised by a tag, passes fsck' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo my_commit && + + A=$(git -C repo commit-tree -m a HEAD^{tree}) && + git -C repo tag -a -m d my_tag_name $A && + T=$(git -C repo rev-parse my_tag_name) && + git -C repo tag -d my_tag_name && + + # Reference $A only from reflog, and delete it + git -C repo branch my_branch "$A" && + git -C repo branch -f my_branch my_commit && + delete_object repo "$A" && + + # State that we got $T, which refers to $A, from promisor + printf "$T\n" | pack_as_from_promisor && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo fsck +' + +test_expect_success 'missing reflog object alone fails fsck, even with extension set' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo my_commit && + + A=$(git -C repo commit-tree -m a HEAD^{tree}) && + B=$(git -C repo commit-tree -m b HEAD^{tree}) && + + # Reference $A only from reflog, and delete it + git -C repo branch my_branch "$A" && + git -C repo branch -f my_branch my_commit && + delete_object repo "$A" && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + test_must_fail git -C repo fsck +' + +test_expect_success 'missing ref object, but promised, passes fsck' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo my_commit && + + A=$(git -C repo commit-tree -m a HEAD^{tree}) && + + # Reference $A only from ref + git -C repo branch my_branch "$A" && + promise_and_delete "$A" && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo fsck +' + +test_expect_success 'missing object, but promised, passes fsck' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo 1 && + test_commit -C repo 2 && + test_commit -C repo 3 && + git -C repo tag -a annotated_tag -m "annotated tag" && + + C=$(git -C repo rev-parse 1) && + T=$(git -C repo rev-parse 2^{tree}) && + B=$(git hash-object repo/3.t) && + AT=$(git -C repo rev-parse annotated_tag) && + + promise_and_delete "$C" && + promise_and_delete "$T" && + promise_and_delete "$B" && + promise_and_delete "$AT" && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo fsck +' + +test_expect_success 'missing CLI object, but promised, passes fsck' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo my_commit && + + A=$(git -C repo commit-tree -m a HEAD^{tree}) && + promise_and_delete "$A" && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo fsck "$A" +' + +test_expect_success 'fetching of missing objects' ' + rm -rf repo && + test_create_repo server && + test_commit -C server foo && + git -C server repack -a -d --write-bitmap-index && + + git clone "file://$(pwd)/server" repo && + HASH=$(git -C repo rev-parse foo) && + rm -rf repo/.git/objects/* && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "origin" && + git -C repo cat-file -p "$HASH" && + + # Ensure that the .promisor file is written, and check that its + # associated packfile contains the object + ls repo/.git/objects/pack/pack-*.promisor >promisorlist && + test_line_count = 1 promisorlist && + IDX=$(cat promisorlist | sed "s/promisor$/idx/") && + git verify-pack --verbose "$IDX" | grep "$HASH" +' + +test_expect_success 'rev-list stops traversal at missing and promised commit' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo foo && + test_commit -C repo bar && + + FOO=$(git -C repo rev-parse foo) && + promise_and_delete "$FOO" && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo rev-list --exclude-promisor-objects --objects bar >out && + grep $(git -C repo rev-parse bar) out && + ! grep $FOO out +' + +test_expect_success 'rev-list stops traversal at missing and promised tree' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo foo && + mkdir repo/a_dir && + echo something >repo/a_dir/something && + git -C repo add a_dir/something && + git -C repo commit -m bar && + + # foo^{tree} (tree referenced from commit) + TREE=$(git -C repo rev-parse foo^{tree}) && + + # a tree referenced by HEAD^{tree} (tree referenced from tree) + TREE2=$(git -C repo ls-tree HEAD^{tree} | grep " tree " | head -1 | cut -b13-52) && + + promise_and_delete "$TREE" && + promise_and_delete "$TREE2" && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo rev-list --exclude-promisor-objects --objects HEAD >out && + grep $(git -C repo rev-parse foo) out && + ! grep $TREE out && + grep $(git -C repo rev-parse HEAD) out && + ! grep $TREE2 out +' + +test_expect_success 'rev-list stops traversal at missing and promised blob' ' + rm -rf repo && + test_create_repo repo && + echo something >repo/something && + git -C repo add something && + git -C repo commit -m foo && + + BLOB=$(git -C repo hash-object -w something) && + promise_and_delete "$BLOB" && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo rev-list --exclude-promisor-objects --objects HEAD >out && + grep $(git -C repo rev-parse HEAD) out && + ! grep $BLOB out +' + +test_expect_success 'rev-list stops traversal at promisor commit, tree, and blob' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo foo && + test_commit -C repo bar && + test_commit -C repo baz && + + COMMIT=$(git -C repo rev-parse foo) && + TREE=$(git -C repo rev-parse bar^{tree}) && + BLOB=$(git hash-object repo/baz.t) && + printf "%s\n%s\n%s\n" $COMMIT $TREE $BLOB | pack_as_from_promisor && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo rev-list --exclude-promisor-objects --objects HEAD >out && + ! grep $COMMIT out && + ! grep $TREE out && + ! grep $BLOB out && + grep $(git -C repo rev-parse bar) out # sanity check that some walking was done +' + +test_expect_success 'rev-list accepts missing and promised objects on command line' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo foo && + test_commit -C repo bar && + test_commit -C repo baz && + + COMMIT=$(git -C repo rev-parse foo) && + TREE=$(git -C repo rev-parse bar^{tree}) && + BLOB=$(git hash-object repo/baz.t) && + + promise_and_delete $COMMIT && + promise_and_delete $TREE && + promise_and_delete $BLOB && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo rev-list --exclude-promisor-objects --objects "$COMMIT" "$TREE" "$BLOB" +' + +test_expect_success 'gc does not repack promisor objects' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo my_commit && + + TREE_HASH=$(git -C repo rev-parse HEAD^{tree}) && + HASH=$(printf "$TREE_HASH\n" | pack_as_from_promisor) && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo gc && + + # Ensure that the promisor packfile still exists, and remove it + test -e repo/.git/objects/pack/pack-$HASH.pack && + rm repo/.git/objects/pack/pack-$HASH.* && + + # Ensure that the single other pack contains the commit, but not the tree + ls repo/.git/objects/pack/pack-*.pack >packlist && + test_line_count = 1 packlist && + git verify-pack repo/.git/objects/pack/pack-*.pack -v >out && + grep "$(git -C repo rev-parse HEAD)" out && + ! grep "$TREE_HASH" out +' + +test_expect_success 'gc stops traversal when a missing but promised object is reached' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo my_commit && + + TREE_HASH=$(git -C repo rev-parse HEAD^{tree}) && + HASH=$(promise_and_delete $TREE_HASH) && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + git -C repo gc && + + # Ensure that the promisor packfile still exists, and remove it + test -e repo/.git/objects/pack/pack-$HASH.pack && + rm repo/.git/objects/pack/pack-$HASH.* && + + # Ensure that the single other pack contains the commit, but not the tree + ls repo/.git/objects/pack/pack-*.pack >packlist && + test_line_count = 1 packlist && + git verify-pack repo/.git/objects/pack/pack-*.pack -v >out && + grep "$(git -C repo rev-parse HEAD)" out && + ! grep "$TREE_HASH" out +' + +LIB_HTTPD_PORT=12345 # default port, 410, cannot be used as non-root +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +test_expect_success 'fetching of missing objects from an HTTP server' ' + rm -rf repo && + SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" && + test_create_repo "$SERVER" && + test_commit -C "$SERVER" foo && + git -C "$SERVER" repack -a -d --write-bitmap-index && + + git clone $HTTPD_URL/smart/server repo && + HASH=$(git -C repo rev-parse foo) && + rm -rf repo/.git/objects/* && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "origin" && + git -C repo cat-file -p "$HASH" && + + # Ensure that the .promisor file is written, and check that its + # associated packfile contains the object + ls repo/.git/objects/pack/pack-*.promisor >promisorlist && + test_line_count = 1 promisorlist && + IDX=$(cat promisorlist | sed "s/promisor$/idx/") && + git verify-pack --verbose "$IDX" | grep "$HASH" +' + +stop_httpd + +test_done diff --git a/transport.c b/transport.c index fc802260f6..e82db773fd 100644 --- a/transport.c +++ b/transport.c @@ -161,6 +161,12 @@ static int set_git_option(struct git_transport_options *opts, } else if (!strcmp(name, TRANS_OPT_DEEPEN_RELATIVE)) { opts->deepen_relative = !!value; return 0; + } else if (!strcmp(name, TRANS_OPT_FROM_PROMISOR)) { + opts->from_promisor = !!value; + return 0; + } else if (!strcmp(name, TRANS_OPT_NO_DEPENDENTS)) { + opts->no_dependents = !!value; + return 0; } return 1; } @@ -229,6 +235,8 @@ static int fetch_refs_via_pack(struct transport *transport, data->options.check_self_contained_and_connected; args.cloning = transport->cloning; args.update_shallow = data->options.update_shallow; + args.from_promisor = data->options.from_promisor; + args.no_dependents = data->options.no_dependents; if (!data->got_remote_heads) { connect_setup(transport, 0); diff --git a/transport.h b/transport.h index 731c78b679..8c3430a5b9 100644 --- a/transport.h +++ b/transport.h @@ -15,6 +15,8 @@ struct git_transport_options { unsigned self_contained_and_connected : 1; unsigned update_shallow : 1; unsigned deepen_relative : 1; + unsigned from_promisor : 1; + unsigned no_dependents : 1; int depth; const char *deepen_since; const struct string_list *deepen_not; @@ -159,6 +161,15 @@ void transport_check_allowed(const char *type); /* Send push certificates */ #define TRANS_OPT_PUSH_CERT "pushcert" +/* Indicate that these objects are being fetched by a promisor */ +#define TRANS_OPT_FROM_PROMISOR "from-promisor" + +/* + * Indicate that only the objects wanted need to be fetched, not their + * dependents + */ +#define TRANS_OPT_NO_DEPENDENTS "no-dependents" + /** * Returns 0 if the option was used, non-zero otherwise. Prints a * message to stderr if the option is not used. |