diff options
-rw-r--r-- | Documentation/rev-list-options.txt | 9 | ||||
-rw-r--r-- | builtin/count-objects.c | 101 | ||||
-rw-r--r-- | builtin/grep.c | 8 | ||||
-rw-r--r-- | builtin/pack-objects.c | 86 | ||||
-rw-r--r-- | builtin/prune-packed.c | 69 | ||||
-rw-r--r-- | builtin/prune.c | 89 | ||||
-rw-r--r-- | builtin/reflog.c | 2 | ||||
-rw-r--r-- | builtin/repack.c | 1 | ||||
-rw-r--r-- | cache.h | 46 | ||||
-rw-r--r-- | git-compat-util.h | 2 | ||||
-rw-r--r-- | list-objects.c | 14 | ||||
-rw-r--r-- | object.c | 48 | ||||
-rw-r--r-- | object.h | 11 | ||||
-rw-r--r-- | reachable.c | 263 | ||||
-rw-r--r-- | reachable.h | 5 | ||||
-rw-r--r-- | revision.c | 113 | ||||
-rw-r--r-- | revision.h | 7 | ||||
-rw-r--r-- | sha1_file.c | 209 | ||||
-rwxr-xr-x | t/t5516-fetch-push.sh | 13 | ||||
-rwxr-xr-x | t/t6000-rev-list-misc.sh | 23 | ||||
-rwxr-xr-x | t/t6501-freshen-objects.sh | 132 | ||||
-rwxr-xr-x | t/t7701-repack-unpack-unreachable.sh | 13 | ||||
-rw-r--r-- | urlmatch.c | 8 |
23 files changed, 836 insertions, 436 deletions
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 5d311b8d46..3301fdebf0 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -168,6 +168,15 @@ respectively, and they must begin with `refs/` when applied to `--glob` or `--all`. If a trailing '/{asterisk}' is intended, it must be given explicitly. +--reflog:: + Pretend as if all objects mentioned by reflogs are listed on the + command line as `<commit>`. + +--indexed-objects:: + Pretend as if all trees and blobs used by the index are listed + on the command line. Note that you probably want to use + `--objects`, too. + --ignore-missing:: Upon seeing an invalid object name in the input, pretend as if the bad input was not given. diff --git a/builtin/count-objects.c b/builtin/count-objects.c index a7f70cb858..e47ef0b1af 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -11,6 +11,9 @@ static unsigned long garbage; static off_t size_garbage; +static int verbose; +static unsigned long loose, packed, packed_loose; +static off_t loose_size; static void real_report_garbage(const char *desc, const char *path) { @@ -21,61 +24,31 @@ static void real_report_garbage(const char *desc, const char *path) garbage++; } -static void count_objects(DIR *d, char *path, int len, int verbose, - unsigned long *loose, - off_t *loose_size, - unsigned long *packed_loose) +static void loose_garbage(const char *path) { - struct dirent *ent; - while ((ent = readdir(d)) != NULL) { - char hex[41]; - unsigned char sha1[20]; - const char *cp; - int bad = 0; + if (verbose) + report_garbage("garbage found", path); +} - if (is_dot_or_dotdot(ent->d_name)) - continue; - for (cp = ent->d_name; *cp; cp++) { - int ch = *cp; - if (('0' <= ch && ch <= '9') || - ('a' <= ch && ch <= 'f')) - continue; - bad = 1; - break; - } - if (cp - ent->d_name != 38) - bad = 1; - else { - struct stat st; - memcpy(path + len + 3, ent->d_name, 38); - path[len + 2] = '/'; - path[len + 41] = 0; - if (lstat(path, &st) || !S_ISREG(st.st_mode)) - bad = 1; - else - (*loose_size) += xsize_t(on_disk_bytes(st)); - } - if (bad) { - if (verbose) { - struct strbuf sb = STRBUF_INIT; - strbuf_addf(&sb, "%.*s/%s", - len + 2, path, ent->d_name); - report_garbage("garbage found", sb.buf); - strbuf_release(&sb); - } - continue; - } - (*loose)++; - if (!verbose) - continue; - memcpy(hex, path+len, 2); - memcpy(hex+2, ent->d_name, 38); - hex[40] = 0; - if (get_sha1_hex(hex, sha1)) - die("internal error"); - if (has_sha1_pack(sha1)) - (*packed_loose)++; +static int count_loose(const unsigned char *sha1, const char *path, void *data) +{ + struct stat st; + + if (lstat(path, &st) || !S_ISREG(st.st_mode)) + loose_garbage(path); + else { + loose_size += on_disk_bytes(st); + loose++; + if (verbose && has_sha1_pack(sha1)) + packed_loose++; } + return 0; +} + +static int count_cruft(const char *basename, const char *path, void *data) +{ + loose_garbage(path); + return 0; } static char const * const count_objects_usage[] = { @@ -85,12 +58,7 @@ static char const * const count_objects_usage[] = { int cmd_count_objects(int argc, const char **argv, const char *prefix) { - int i, verbose = 0, human_readable = 0; - const char *objdir = get_object_directory(); - int len = strlen(objdir); - char *path = xmalloc(len + 50); - unsigned long loose = 0, packed = 0, packed_loose = 0; - off_t loose_size = 0; + int human_readable = 0; struct option opts[] = { OPT__VERBOSE(&verbose, N_("be verbose")), OPT_BOOL('H', "human-readable", &human_readable, @@ -104,19 +72,10 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) usage_with_options(count_objects_usage, opts); if (verbose) report_garbage = real_report_garbage; - memcpy(path, objdir, len); - if (len && objdir[len-1] != '/') - path[len++] = '/'; - for (i = 0; i < 256; i++) { - DIR *d; - sprintf(path + len, "%02x", i); - d = opendir(path); - if (!d) - continue; - count_objects(d, path, len, verbose, - &loose, &loose_size, &packed_loose); - closedir(d); - } + + for_each_loose_file_in_objdir(get_object_directory(), + count_loose, count_cruft, NULL, NULL); + if (verbose) { struct packed_git *p; unsigned long num_pack = 0; diff --git a/builtin/grep.c b/builtin/grep.c index c86a142f30..4063882f06 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -456,10 +456,10 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, } static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec, - struct object *obj, const char *name, struct object_context *oc) + struct object *obj, const char *name, const char *path) { if (obj->type == OBJ_BLOB) - return grep_sha1(opt, obj->sha1, name, 0, oc ? oc->path : NULL); + return grep_sha1(opt, obj->sha1, name, 0, path); if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) { struct tree_desc tree; void *data; @@ -501,7 +501,7 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec, for (i = 0; i < nr; i++) { struct object *real_obj; real_obj = deref_tag(list->objects[i].item, NULL, 0); - if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].context)) { + if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].path)) { hit = 1; if (opt->status_only) break; @@ -821,7 +821,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) struct object *object = parse_object_or_die(sha1, arg); if (!seen_dashdash) verify_non_filename(prefix, arg); - add_object_array_with_context(object, arg, &list, xmemdupz(&oc, sizeof(struct object_context))); + add_object_array_with_path(object, arg, &list, oc.mode, oc.path); continue; } if (!strcmp(arg, "--")) { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 78c659a6b4..3f9f5c7760 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -20,6 +20,9 @@ #include "streaming.h" #include "thread-utils.h" #include "pack-bitmap.h" +#include "reachable.h" +#include "sha1-array.h" +#include "argv-array.h" static const char *pack_usage[] = { N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"), @@ -2406,6 +2409,27 @@ static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1) return 0; } +/* + * Store a list of sha1s that are should not be discarded + * because they are either written too recently, or are + * reachable from another object that was. + * + * This is filled by get_object_list. + */ +static struct sha1_array recent_objects; + +static int loosened_object_can_be_discarded(const unsigned char *sha1, + unsigned long mtime) +{ + if (!unpack_unreachable_expiration) + return 0; + if (mtime > unpack_unreachable_expiration) + return 0; + if (sha1_array_lookup(&recent_objects, sha1) >= 0) + return 0; + return 1; +} + static void loosen_unused_packed_objects(struct rev_info *revs) { struct packed_git *p; @@ -2416,17 +2440,14 @@ static void loosen_unused_packed_objects(struct rev_info *revs) if (!p->pack_local || p->pack_keep) continue; - if (unpack_unreachable_expiration && - p->mtime < unpack_unreachable_expiration) - continue; - if (open_pack_index(p)) die("cannot open pack index"); for (i = 0; i < p->num_objects; i++) { sha1 = nth_packed_object_sha1(p, i); if (!packlist_find(&to_pack, sha1, NULL) && - !has_sha1_pack_kept_or_nonlocal(sha1)) + !has_sha1_pack_kept_or_nonlocal(sha1) && + !loosened_object_can_be_discarded(sha1, p->mtime)) if (force_object_loose(sha1, p->mtime)) die("unable to force loose object"); } @@ -2462,6 +2483,19 @@ static int get_object_list_from_bitmap(struct rev_info *revs) return 0; } +static void record_recent_object(struct object *obj, + const struct name_path *path, + const char *last, + void *data) +{ + sha1_array_append(&recent_objects, obj->sha1); +} + +static void record_recent_commit(struct commit *commit, void *data) +{ + sha1_array_append(&recent_objects, commit->object.sha1); +} + static void get_object_list(int ac, const char **av) { struct rev_info revs; @@ -2509,10 +2543,23 @@ static void get_object_list(int ac, const char **av) mark_edges_uninteresting(&revs, show_edge); traverse_commit_list(&revs, show_commit, show_object, NULL); + if (unpack_unreachable_expiration) { + revs.ignore_missing_links = 1; + if (add_unseen_recent_objects_to_traversal(&revs, + unpack_unreachable_expiration)) + die("unable to add recent objects"); + if (prepare_revision_walk(&revs)) + die("revision walk setup failed"); + traverse_commit_list(&revs, record_recent_commit, + record_recent_object, NULL); + } + if (keep_unreachable) add_objects_in_unpacked_packs(&revs); if (unpack_unreachable) loosen_unused_packed_objects(&revs); + + sha1_array_clear(&recent_objects); } static int option_parse_index_version(const struct option *opt, @@ -2567,9 +2614,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) int use_internal_rev_list = 0; int thin = 0; int all_progress_implied = 0; - const char *rp_av[6]; - int rp_ac = 0; + struct argv_array rp = ARGV_ARRAY_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; + int rev_list_index = 0; struct option pack_objects_options[] = { OPT_SET_INT('q', "quiet", &progress, N_("do not show progress meter"), 0), @@ -2616,6 +2663,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) { OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL, N_("include objects referred by reflog entries"), PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, + { OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL, + N_("include objects referred to by the index"), + PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, OPT_BOOL(0, "stdout", &pack_to_stdout, N_("output pack to stdout")), OPT_BOOL(0, "include-tag", &include_tag, @@ -2658,24 +2708,28 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (pack_to_stdout != !base_name || argc) usage_with_options(pack_usage, pack_objects_options); - rp_av[rp_ac++] = "pack-objects"; + argv_array_push(&rp, "pack-objects"); if (thin) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--objects-edge"; + argv_array_push(&rp, "--objects-edge"); } else - rp_av[rp_ac++] = "--objects"; + argv_array_push(&rp, "--objects"); if (rev_list_all) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--all"; + argv_array_push(&rp, "--all"); } if (rev_list_reflog) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--reflog"; + argv_array_push(&rp, "--reflog"); + } + if (rev_list_index) { + use_internal_rev_list = 1; + argv_array_push(&rp, "--indexed-objects"); } if (rev_list_unpacked) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--unpacked"; + argv_array_push(&rp, "--unpacked"); } if (!reuse_object) @@ -2706,6 +2760,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (keep_unreachable && unpack_unreachable) die("--keep-unreachable and --unpack-unreachable are incompatible."); + if (!rev_list_all || !rev_list_reflog || !rev_list_index) + unpack_unreachable_expiration = 0; if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow()) use_bitmap_index = 0; @@ -2723,8 +2779,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!use_internal_rev_list) read_object_list_from_stdin(); else { - rp_av[rp_ac] = NULL; - get_object_list(rp_ac, rp_av); + get_object_list(rp.argc, rp.argv); + argv_array_clear(&rp); } cleanup_preferred_base(); if (include_tag && nr_result) diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c index d430731d70..f24a2c2bdc 100644 --- a/builtin/prune-packed.c +++ b/builtin/prune-packed.c @@ -10,65 +10,42 @@ static const char * const prune_packed_usage[] = { static struct progress *progress; -static void prune_dir(int i, DIR *dir, struct strbuf *pathname, int opts) +static int prune_subdir(int nr, const char *path, void *data) { - struct dirent *de; - char hex[40]; - int top_len = pathname->len; + int *opts = data; + display_progress(progress, nr + 1); + if (!(*opts & PRUNE_PACKED_DRY_RUN)) + rmdir(path); + return 0; +} + +static int prune_object(const unsigned char *sha1, const char *path, + void *data) +{ + int *opts = data; - sprintf(hex, "%02x", i); - while ((de = readdir(dir)) != NULL) { - unsigned char sha1[20]; - if (strlen(de->d_name) != 38) - continue; - memcpy(hex + 2, de->d_name, 38); - if (get_sha1_hex(hex, sha1)) - continue; - if (!has_sha1_pack(sha1)) - continue; + if (!has_sha1_pack(sha1)) + return 0; - strbuf_add(pathname, de->d_name, 38); - if (opts & PRUNE_PACKED_DRY_RUN) - printf("rm -f %s\n", pathname->buf); - else - unlink_or_warn(pathname->buf); - display_progress(progress, i + 1); - strbuf_setlen(pathname, top_len); - } + if (*opts & PRUNE_PACKED_DRY_RUN) + printf("rm -f %s\n", path); + else + unlink_or_warn(path); + return 0; } void prune_packed_objects(int opts) { - int i; - const char *dir = get_object_directory(); - struct strbuf pathname = STRBUF_INIT; - int top_len; - - strbuf_addstr(&pathname, dir); if (opts & PRUNE_PACKED_VERBOSE) progress = start_progress_delay(_("Removing duplicate objects"), 256, 95, 2); - if (pathname.len && pathname.buf[pathname.len - 1] != '/') - strbuf_addch(&pathname, '/'); - - top_len = pathname.len; - for (i = 0; i < 256; i++) { - DIR *d; + for_each_loose_file_in_objdir(get_object_directory(), + prune_object, NULL, prune_subdir, &opts); - display_progress(progress, i + 1); - strbuf_setlen(&pathname, top_len); - strbuf_addf(&pathname, "%02x/", i); - d = opendir(pathname.buf); - if (!d) - continue; - prune_dir(i, d, &pathname, opts); - closedir(d); - strbuf_setlen(&pathname, top_len + 2); - rmdir(pathname.buf); - } + /* Ensure we show 100% before finishing progress */ + display_progress(progress, 256); stop_progress(&progress); - strbuf_release(&pathname); } int cmd_prune_packed(int argc, const char **argv, const char *prefix) diff --git a/builtin/prune.c b/builtin/prune.c index 144a3bdb33..04d3b12ae4 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -31,11 +31,23 @@ static int prune_tmp_file(const char *fullpath) return 0; } -static int prune_object(const char *fullpath, const unsigned char *sha1) +static int prune_object(const unsigned char *sha1, const char *fullpath, + void *data) { struct stat st; - if (lstat(fullpath, &st)) - return error("Could not stat '%s'", fullpath); + + /* + * Do we know about this object? + * It must have been reachable + */ + if (lookup_object(sha1)) + return 0; + + if (lstat(fullpath, &st)) { + /* report errors, but do not stop pruning */ + error("Could not stat '%s'", fullpath); + return 0; + } if (st.st_mtime > expire) return 0; if (show_only || verbose) { @@ -48,68 +60,20 @@ static int prune_object(const char *fullpath, const unsigned char *sha1) return 0; } -static int prune_dir(int i, struct strbuf *path) +static int prune_cruft(const char *basename, const char *path, void *data) { - size_t baselen = path->len; - DIR *dir = opendir(path->buf); - struct dirent *de; - - if (!dir) - return 0; - - while ((de = readdir(dir)) != NULL) { - char name[100]; - unsigned char sha1[20]; - - if (is_dot_or_dotdot(de->d_name)) - continue; - if (strlen(de->d_name) == 38) { - sprintf(name, "%02x", i); - memcpy(name+2, de->d_name, 39); - if (get_sha1_hex(name, sha1) < 0) - break; - - /* - * Do we know about this object? - * It must have been reachable - */ - if (lookup_object(sha1)) - continue; - - strbuf_addf(path, "/%s", de->d_name); - prune_object(path->buf, sha1); - strbuf_setlen(path, baselen); - continue; - } - if (starts_with(de->d_name, "tmp_obj_")) { - strbuf_addf(path, "/%s", de->d_name); - prune_tmp_file(path->buf); - strbuf_setlen(path, baselen); - continue; - } - fprintf(stderr, "bad sha1 file: %s/%s\n", path->buf, de->d_name); - } - closedir(dir); - if (!show_only) - rmdir(path->buf); + if (starts_with(basename, "tmp_obj_")) + prune_tmp_file(path); + else + fprintf(stderr, "bad sha1 file: %s\n", path); return 0; } -static void prune_object_dir(const char *path) +static int prune_subdir(int nr, const char *path, void *data) { - struct strbuf buf = STRBUF_INIT; - size_t baselen; - int i; - - strbuf_addstr(&buf, path); - strbuf_addch(&buf, '/'); - baselen = buf.len; - - for (i = 0; i < 256; i++) { - strbuf_addf(&buf, "%02x", i); - prune_dir(i, &buf); - strbuf_setlen(&buf, baselen); - } + if (!show_only) + rmdir(path); + return 0; } /* @@ -171,9 +135,10 @@ int cmd_prune(int argc, const char **argv, const char *prefix) if (show_progress) progress = start_progress_delay(_("Checking connectivity"), 0, 0, 2); - mark_reachable_objects(&revs, 1, progress); + mark_reachable_objects(&revs, 1, expire, progress); stop_progress(&progress); - prune_object_dir(get_object_directory()); + for_each_loose_file_in_objdir(get_object_directory(), prune_object, + prune_cruft, prune_subdir, NULL); prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0); remove_temporary_files(get_object_directory()); diff --git a/builtin/reflog.c b/builtin/reflog.c index b6388f75b0..2d85d260ca 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -649,7 +649,7 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix) init_revisions(&cb.revs, prefix); if (cb.verbose) printf("Marking reachable objects..."); - mark_reachable_objects(&cb.revs, 0, NULL); + mark_reachable_objects(&cb.revs, 0, 0, NULL); if (cb.verbose) putchar('\n'); } diff --git a/builtin/repack.c b/builtin/repack.c index 2aae05d364..28456206c5 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -209,6 +209,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) argv_array_push(&cmd_args, "--non-empty"); argv_array_push(&cmd_args, "--all"); argv_array_push(&cmd_args, "--reflog"); + argv_array_push(&cmd_args, "--indexed-objects"); if (window) argv_array_pushf(&cmd_args, "--window=%s", window); if (window_memory) @@ -1145,7 +1145,7 @@ extern void prepare_alt_odb(void); extern void read_info_alternates(const char * relative_base, int depth); extern void add_to_alternates_file(const char *reference); typedef int alt_odb_fn(struct alternate_object_database *, void *); -extern void foreach_alt_odb(alt_odb_fn, void*); +extern int foreach_alt_odb(alt_odb_fn, void*); struct pack_window { struct pack_window *next; @@ -1241,6 +1241,50 @@ extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsig extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *); +/* + * Iterate over the files in the loose-object parts of the object + * directory "path", triggering the following callbacks: + * + * - loose_object is called for each loose object we find. + * + * - loose_cruft is called for any files that do not appear to be + * loose objects. Note that we only look in the loose object + * directories "objects/[0-9a-f]{2}/", so we will not report + * "objects/foobar" as cruft. + * + * - loose_subdir is called for each top-level hashed subdirectory + * of the object directory (e.g., "$OBJDIR/f0"). It is called + * after the objects in the directory are processed. + * + * Any callback that is NULL will be ignored. Callbacks returning non-zero + * will end the iteration. + */ +typedef int each_loose_object_fn(const unsigned char *sha1, + const char *path, + void *data); +typedef int each_loose_cruft_fn(const char *basename, + const char *path, + void *data); +typedef int each_loose_subdir_fn(int nr, + const char *path, + void *data); +int for_each_loose_file_in_objdir(const char *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); + +/* + * Iterate over loose and packed objects in both the local + * repository and any alternates repositories. + */ +typedef int each_packed_object_fn(const unsigned char *sha1, + struct packed_git *pack, + uint32_t pos, + void *data); +extern int for_each_loose_object(each_loose_object_fn, void *); +extern int for_each_packed_object(each_packed_object_fn, void *); + struct object_info { /* Request */ enum object_type *typep; diff --git a/git-compat-util.h b/git-compat-util.h index 210712728d..fc83339bd7 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -684,7 +684,7 @@ extern const unsigned char sane_ctype[256]; #define iscntrl(x) (sane_istest(x,GIT_CNTRL)) #define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \ GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC) -#define isxdigit(x) (hexval_table[x] != -1) +#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1) #define tolower(x) sane_case((unsigned char)(x), 0x20) #define toupper(x) sane_case((unsigned char)(x), 0) #define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC) diff --git a/list-objects.c b/list-objects.c index 3595ee7a22..2910becd6c 100644 --- a/list-objects.c +++ b/list-objects.c @@ -208,6 +208,7 @@ void traverse_commit_list(struct rev_info *revs, struct object_array_entry *pending = revs->pending.objects + i; struct object *obj = pending->item; const char *name = pending->name; + const char *path = pending->path; if (obj->flags & (UNINTERESTING | SEEN)) continue; if (obj->type == OBJ_TAG) { @@ -215,24 +216,21 @@ void traverse_commit_list(struct rev_info *revs, show_object(obj, NULL, name, data); continue; } + if (!path) + path = ""; if (obj->type == OBJ_TREE) { process_tree(revs, (struct tree *)obj, show_object, - NULL, &base, name, data); + NULL, &base, path, data); continue; } if (obj->type == OBJ_BLOB) { process_blob(revs, (struct blob *)obj, show_object, - NULL, name, data); + NULL, path, data); continue; } die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name); } - if (revs->pending.nr) { - free(revs->pending.objects); - revs->pending.nr = 0; - revs->pending.alloc = 0; - revs->pending.objects = NULL; - } + object_array_clear(&revs->pending); strbuf_release(&base); } @@ -307,10 +307,9 @@ int object_list_contains(struct object_list *list, struct object *obj) */ static char object_array_slopbuf[1]; -static void add_object_array_with_mode_context(struct object *obj, const char *name, - struct object_array *array, - unsigned mode, - struct object_context *context) +void add_object_array_with_path(struct object *obj, const char *name, + struct object_array *array, + unsigned mode, const char *path) { unsigned nr = array->nr; unsigned alloc = array->alloc; @@ -333,26 +332,27 @@ static void add_object_array_with_mode_context(struct object *obj, const char *n else entry->name = xstrdup(name); entry->mode = mode; - entry->context = context; + if (path) + entry->path = xstrdup(path); + else + entry->path = NULL; array->nr = ++nr; } void add_object_array(struct object *obj, const char *name, struct object_array *array) { - add_object_array_with_mode(obj, name, array, S_IFINVALID); + add_object_array_with_path(obj, name, array, S_IFINVALID, NULL); } -void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode) +/* + * Free all memory associated with an entry; the result is + * in an unspecified state and should not be examined. + */ +static void object_array_release_entry(struct object_array_entry *ent) { - add_object_array_with_mode_context(obj, name, array, mode, NULL); -} - -void add_object_array_with_context(struct object *obj, const char *name, struct object_array *array, struct object_context *context) -{ - if (context) - add_object_array_with_mode_context(obj, name, array, context->mode, context); - else - add_object_array_with_mode_context(obj, name, array, S_IFINVALID, context); + if (ent->name != object_array_slopbuf) + free(ent->name); + free(ent->path); } void object_array_filter(struct object_array *array, @@ -367,13 +367,22 @@ void object_array_filter(struct object_array *array, objects[dst] = objects[src]; dst++; } else { - if (objects[src].name != object_array_slopbuf) - free(objects[src].name); + object_array_release_entry(&objects[src]); } } array->nr = dst; } +void object_array_clear(struct object_array *array) +{ + int i; + for (i = 0; i < array->nr; i++) + object_array_release_entry(&array->objects[i]); + free(array->objects); + array->objects = NULL; + array->nr = array->alloc = 0; +} + /* * Return true iff array already contains an entry with name. */ @@ -400,8 +409,7 @@ void object_array_remove_duplicates(struct object_array *array) objects[array->nr] = objects[src]; array->nr++; } else { - if (objects[src].name != object_array_slopbuf) - free(objects[src].name); + object_array_release_entry(&objects[src]); } } } @@ -18,8 +18,8 @@ struct object_array { * empty string. */ char *name; + char *path; unsigned mode; - struct object_context *context; } *objects; }; @@ -114,8 +114,7 @@ int object_list_contains(struct object_list *list, struct object *obj); /* Object array handling .. */ void add_object_array(struct object *obj, const char *name, struct object_array *array); -void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode); -void add_object_array_with_context(struct object *obj, const char *name, struct object_array *array, struct object_context *context); +void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path); typedef int (*object_array_each_func_t)(struct object_array_entry *, void *); @@ -133,6 +132,12 @@ void object_array_filter(struct object_array *array, */ void object_array_remove_duplicates(struct object_array *array); +/* + * Remove any objects from the array, freeing all used memory; afterwards + * the array is ready to store more objects with add_object_array(). + */ +void object_array_clear(struct object_array *array); + void clear_object_flags(unsigned flags); #endif /* OBJECT_H */ diff --git a/reachable.c b/reachable.c index 6f6835bf27..a647267ae9 100644 --- a/reachable.c +++ b/reachable.c @@ -8,6 +8,7 @@ #include "reachable.h" #include "cache-tree.h" #include "progress.h" +#include "list-objects.h" struct connectivity_progress { struct progress *progress; @@ -21,196 +22,134 @@ static void update_progress(struct connectivity_progress *cp) display_progress(cp->progress, cp->count); } -static void process_blob(struct blob *blob, - struct object_array *p, - struct name_path *path, - const char *name, - struct connectivity_progress *cp) +static int add_one_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data) { - struct object *obj = &blob->object; + struct object *object = parse_object_or_die(sha1, path); + struct rev_info *revs = (struct rev_info *)cb_data; - if (!blob) - die("bad blob object"); - if (obj->flags & SEEN) - return; - obj->flags |= SEEN; - update_progress(cp); - /* Nothing to do, really .. The blob lookup was the important part */ + add_pending_object(revs, object, ""); + + return 0; } -static void process_gitlink(const unsigned char *sha1, - struct object_array *p, - struct name_path *path, - const char *name) +/* + * The traversal will have already marked us as SEEN, so we + * only need to handle any progress reporting here. + */ +static void mark_object(struct object *obj, const struct name_path *path, + const char *name, void *data) { - /* I don't think we want to recurse into this, really. */ + update_progress(data); } -static void process_tree(struct tree *tree, - struct object_array *p, - struct name_path *path, - const char *name, - struct connectivity_progress *cp) +static void mark_commit(struct commit *c, void *data) { - struct object *obj = &tree->object; - struct tree_desc desc; - struct name_entry entry; - struct name_path me; - - if (!tree) - die("bad tree object"); - if (obj->flags & SEEN) - return; - obj->flags |= SEEN; - update_progress(cp); - if (parse_tree(tree) < 0) - die("bad tree object %s", sha1_to_hex(obj->sha1)); - add_object(obj, p, path, name); - me.up = path; - me.elem = name; - me.elem_len = strlen(name); - - init_tree_desc(&desc, tree->buffer, tree->size); - - while (tree_entry(&desc, &entry)) { - if (S_ISDIR(entry.mode)) - process_tree(lookup_tree(entry.sha1), p, &me, entry.path, cp); - else if (S_ISGITLINK(entry.mode)) - process_gitlink(entry.sha1, p, &me, entry.path); - else - process_blob(lookup_blob(entry.sha1), p, &me, entry.path, cp); - } - free_tree_buffer(tree); + mark_object(&c->object, NULL, NULL, data); } -static void process_tag(struct tag *tag, struct object_array *p, - const char *name, struct connectivity_progress *cp) +struct recent_data { + struct rev_info *revs; + unsigned long timestamp; +}; + +static void add_recent_object(const unsigned char *sha1, + unsigned long mtime, + struct recent_data *data) { - struct object *obj = &tag->object; + struct object *obj; + enum object_type type; - if (obj->flags & SEEN) + if (mtime <= data->timestamp) return; - obj->flags |= SEEN; - update_progress(cp); - if (parse_tag(tag) < 0) - die("bad tag object %s", sha1_to_hex(obj->sha1)); - if (tag->tagged) - add_object(tag->tagged, p, NULL, name); -} - -static void walk_commit_list(struct rev_info *revs, - struct connectivity_progress *cp) -{ - int i; - struct commit *commit; - struct object_array objects = OBJECT_ARRAY_INIT; - - /* Walk all commits, process their trees */ - while ((commit = get_revision(revs)) != NULL) { - process_tree(commit->tree, &objects, NULL, "", cp); - update_progress(cp); - } - - /* Then walk all the pending objects, recursively processing them too */ - for (i = 0; i < revs->pending.nr; i++) { - struct object_array_entry *pending = revs->pending.objects + i; - struct object *obj = pending->item; - const char *name = pending->name; - if (obj->type == OBJ_TAG) { - process_tag((struct tag *) obj, &objects, name, cp); - continue; - } - if (obj->type == OBJ_TREE) { - process_tree((struct tree *)obj, &objects, NULL, name, cp); - continue; - } - if (obj->type == OBJ_BLOB) { - process_blob((struct blob *)obj, &objects, NULL, name, cp); - continue; - } - die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name); + /* + * We do not want to call parse_object here, because + * inflating blobs and trees could be very expensive. + * However, we do need to know the correct type for + * later processing, and the revision machinery expects + * commits and tags to have been parsed. + */ + type = sha1_object_info(sha1, NULL); + if (type < 0) + die("unable to get object info for %s", sha1_to_hex(sha1)); + + switch (type) { + case OBJ_TAG: + case OBJ_COMMIT: + obj = parse_object_or_die(sha1, NULL); + break; + case OBJ_TREE: + obj = (struct object *)lookup_tree(sha1); + break; + case OBJ_BLOB: + obj = (struct object *)lookup_blob(sha1); + break; + default: + die("unknown object type for %s: %s", + sha1_to_hex(sha1), typename(type)); } -} -static int add_one_reflog_ent(unsigned char *osha1, unsigned char *nsha1, - const char *email, unsigned long timestamp, int tz, - const char *message, void *cb_data) -{ - struct object *object; - struct rev_info *revs = (struct rev_info *)cb_data; + if (!obj) + die("unable to lookup %s", sha1_to_hex(sha1)); - object = parse_object(osha1); - if (object) - add_pending_object(revs, object, ""); - object = parse_object(nsha1); - if (object) - add_pending_object(revs, object, ""); - return 0; + add_pending_object(data->revs, obj, ""); } -static int add_one_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data) +static int add_recent_loose(const unsigned char *sha1, + const char *path, void *data) { - struct object *object = parse_object_or_die(sha1, path); - struct rev_info *revs = (struct rev_info *)cb_data; + struct stat st; + struct object *obj = lookup_object(sha1); - add_pending_object(revs, object, ""); + if (obj && obj->flags & SEEN) + return 0; - return 0; -} + if (stat(path, &st) < 0) { + /* + * It's OK if an object went away during our iteration; this + * could be due to a simultaneous repack. But anything else + * we should abort, since we might then fail to mark objects + * which should not be pruned. + */ + if (errno == ENOENT) + return 0; + return error("unable to stat %s: %s", + sha1_to_hex(sha1), strerror(errno)); + } -static int add_one_reflog(const char *path, const unsigned char *sha1, int flag, void *cb_data) -{ - for_each_reflog_ent(path, add_one_reflog_ent, cb_data); + add_recent_object(sha1, st.st_mtime, data); return 0; } -static void add_one_tree(const unsigned char *sha1, struct rev_info *revs) +static int add_recent_packed(const unsigned char *sha1, + struct packed_git *p, uint32_t pos, + void *data) { - struct tree *tree = lookup_tree(sha1); - if (tree) - add_pending_object(revs, &tree->object, ""); -} + struct object *obj = lookup_object(sha1); -static void add_cache_tree(struct cache_tree *it, struct rev_info *revs) -{ - int i; - - if (it->entry_count >= 0) - add_one_tree(it->sha1, revs); - for (i = 0; i < it->subtree_nr; i++) - add_cache_tree(it->down[i]->cache_tree, revs); + if (obj && obj->flags & SEEN) + return 0; + add_recent_object(sha1, p->mtime, data); + return 0; } -static void add_cache_refs(struct rev_info *revs) +int add_unseen_recent_objects_to_traversal(struct rev_info *revs, + unsigned long timestamp) { - int i; + struct recent_data data; + int r; - read_cache(); - for (i = 0; i < active_nr; i++) { - /* - * The index can contain blobs and GITLINKs, GITLINKs are hashes - * that don't actually point to objects in the repository, it's - * almost guaranteed that they are NOT blobs, so we don't call - * lookup_blob() on them, to avoid populating the hash table - * with invalid information - */ - if (S_ISGITLINK(active_cache[i]->ce_mode)) - continue; + data.revs = revs; + data.timestamp = timestamp; - lookup_blob(active_cache[i]->sha1); - /* - * We could add the blobs to the pending list, but quite - * frankly, we don't care. Once we've looked them up, and - * added them as objects, we've really done everything - * there is to do for a blob - */ - } - if (active_cache_tree) - add_cache_tree(active_cache_tree, revs); + r = for_each_loose_object(add_recent_loose, &data); + if (r) + return r; + return for_each_packed_object(add_recent_packed, &data); } void mark_reachable_objects(struct rev_info *revs, int mark_reflog, + unsigned long mark_recent, struct progress *progress) { struct connectivity_progress cp; @@ -224,7 +163,7 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, revs->tree_objects = 1; /* Add all refs from the index file */ - add_cache_refs(revs); + add_index_objects_to_pending(revs, 0); /* Add all external refs */ for_each_ref(add_one_ref, revs); @@ -234,7 +173,7 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, /* Add all reflog info */ if (mark_reflog) - for_each_reflog(add_one_reflog, revs); + add_reflogs_to_pending(revs, 0); cp.progress = progress; cp.count = 0; @@ -245,6 +184,16 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, */ if (prepare_revision_walk(revs)) die("revision walk setup failed"); - walk_commit_list(revs, &cp); + traverse_commit_list(revs, mark_commit, mark_object, &cp); + + if (mark_recent) { + revs->ignore_missing_links = 1; + if (add_unseen_recent_objects_to_traversal(revs, mark_recent)) + die("unable to mark recent objects"); + if (prepare_revision_walk(revs)) + die("revision walk setup failed"); + traverse_commit_list(revs, mark_commit, mark_object, &cp); + } + display_progress(cp.progress, cp.count); } diff --git a/reachable.h b/reachable.h index 5d082adfec..d23efc36ec 100644 --- a/reachable.h +++ b/reachable.h @@ -2,6 +2,9 @@ #define REACHEABLE_H struct progress; -extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog, struct progress *); +extern int add_unseen_recent_objects_to_traversal(struct rev_info *revs, + unsigned long timestamp); +extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog, + unsigned long mark_recent, struct progress *); #endif diff --git a/revision.c b/revision.c index e498b7c339..75dda928ea 100644 --- a/revision.c +++ b/revision.c @@ -17,6 +17,7 @@ #include "mailmap.h" #include "commit-slab.h" #include "dir.h" +#include "cache-tree.h" volatile show_early_output_fn_t show_early_output; @@ -86,16 +87,6 @@ void show_object_with_name(FILE *out, struct object *obj, fputc('\n', out); } -void add_object(struct object *obj, - struct object_array *p, - struct name_path *path, - const char *name) -{ - char *pn = path_name(path, name); - add_object_array(obj, pn, p); - free(pn); -} - static void mark_blob_uninteresting(struct blob *blob) { if (!blob) @@ -198,9 +189,10 @@ void mark_parents_uninteresting(struct commit *commit) } } -static void add_pending_object_with_mode(struct rev_info *revs, +static void add_pending_object_with_path(struct rev_info *revs, struct object *obj, - const char *name, unsigned mode) + const char *name, unsigned mode, + const char *path) { if (!obj) return; @@ -220,7 +212,14 @@ static void add_pending_object_with_mode(struct rev_info *revs, if (st) return; } - add_object_array_with_mode(obj, name, &revs->pending, mode); + add_object_array_with_path(obj, name, &revs->pending, mode, path); +} + +static void add_pending_object_with_mode(struct rev_info *revs, + struct object *obj, + const char *name, unsigned mode) +{ + add_pending_object_with_path(revs, obj, name, mode, NULL); } void add_pending_object(struct rev_info *revs, @@ -265,8 +264,12 @@ void add_pending_sha1(struct rev_info *revs, const char *name, } static struct commit *handle_commit(struct rev_info *revs, - struct object *object, const char *name) + struct object_array_entry *entry) { + struct object *object = entry->item; + const char *name = entry->name; + const char *path = entry->path; + unsigned int mode = entry->mode; unsigned long flags = object->flags; /* @@ -285,6 +288,14 @@ static struct commit *handle_commit(struct rev_info *revs, die("bad object %s", sha1_to_hex(tag->tagged->sha1)); } object->flags |= flags; + /* + * We'll handle the tagged object by looping or dropping + * through to the non-tag handlers below. Do not + * propagate data from the tag's pending entry. + */ + name = ""; + path = NULL; + mode = 0; } /* @@ -300,7 +311,7 @@ static struct commit *handle_commit(struct rev_info *revs, revs->limited = 1; } if (revs->show_source && !commit->util) - commit->util = (void *) name; + commit->util = xstrdup(name); return commit; } @@ -316,7 +327,7 @@ static struct commit *handle_commit(struct rev_info *revs, mark_tree_contents_uninteresting(tree); return NULL; } - add_pending_object(revs, object, ""); + add_pending_object_with_path(revs, object, name, mode, path); return NULL; } @@ -328,7 +339,7 @@ static struct commit *handle_commit(struct rev_info *revs, return NULL; if (flags & UNINTERESTING) return NULL; - add_pending_object(revs, object, ""); + add_pending_object_with_path(revs, object, name, mode, path); return NULL; } die("%s is unknown object", name); @@ -1275,7 +1286,7 @@ static int handle_one_reflog(const char *path, const unsigned char *sha1, int fl return 0; } -static void handle_reflog(struct rev_info *revs, unsigned flags) +void add_reflogs_to_pending(struct rev_info *revs, unsigned flags) { struct all_refs_cb cb; cb.all_revs = revs; @@ -1283,6 +1294,53 @@ static void handle_reflog(struct rev_info *revs, unsigned flags) for_each_reflog(handle_one_reflog, &cb); } +static void add_cache_tree(struct cache_tree *it, struct rev_info *revs, + struct strbuf *path) +{ + size_t baselen = path->len; + int i; + + if (it->entry_count >= 0) { + struct tree *tree = lookup_tree(it->sha1); + add_pending_object_with_path(revs, &tree->object, "", + 040000, path->buf); + } + + for (i = 0; i < it->subtree_nr; i++) { + struct cache_tree_sub *sub = it->down[i]; + strbuf_addf(path, "%s%s", baselen ? "/" : "", sub->name); + add_cache_tree(sub->cache_tree, revs, path); + strbuf_setlen(path, baselen); + } + +} + +void add_index_objects_to_pending(struct rev_info *revs, unsigned flags) +{ + int i; + + read_cache(); + for (i = 0; i < active_nr; i++) { + struct cache_entry *ce = active_cache[i]; + struct blob *blob; + + if (S_ISGITLINK(ce->ce_mode)) + continue; + + blob = lookup_blob(ce->sha1); + if (!blob) + die("unable to add index blob to traversal"); + add_pending_object_with_path(revs, &blob->object, "", + ce->ce_mode, ce->name); + } + + if (active_cache_tree) { + struct strbuf path = STRBUF_INIT; + add_cache_tree(active_cache_tree, revs, &path); + strbuf_release(&path); + } +} + static int add_parents_only(struct rev_info *revs, const char *arg_, int flags) { unsigned char sha1[20]; @@ -1633,6 +1691,7 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg !strcmp(arg, "--reflog") || !strcmp(arg, "--not") || !strcmp(arg, "--no-walk") || !strcmp(arg, "--do-walk") || !strcmp(arg, "--bisect") || starts_with(arg, "--glob=") || + !strcmp(arg, "--indexed-objects") || starts_with(arg, "--exclude=") || starts_with(arg, "--branches=") || starts_with(arg, "--tags=") || starts_with(arg, "--remotes=") || starts_with(arg, "--no-walk=")) @@ -2061,7 +2120,9 @@ static int handle_revision_pseudo_opt(const char *submodule, for_each_glob_ref_in(handle_one_ref, arg + 10, "refs/remotes/", &cb); clear_ref_exclusion(&revs->ref_excludes); } else if (!strcmp(arg, "--reflog")) { - handle_reflog(revs, *flags); + add_reflogs_to_pending(revs, *flags); + } else if (!strcmp(arg, "--indexed-objects")) { + add_index_objects_to_pending(revs, *flags); } else if (!strcmp(arg, "--not")) { *flags ^= UNINTERESTING | BOTTOM; } else if (!strcmp(arg, "--no-walk")) { @@ -2656,26 +2717,26 @@ void reset_revision_walk(void) int prepare_revision_walk(struct rev_info *revs) { - int nr = revs->pending.nr; - struct object_array_entry *e, *list; + int i; + struct object_array old_pending; struct commit_list **next = &revs->commits; - e = list = revs->pending.objects; + memcpy(&old_pending, &revs->pending, sizeof(old_pending)); revs->pending.nr = 0; revs->pending.alloc = 0; revs->pending.objects = NULL; - while (--nr >= 0) { - struct commit *commit = handle_commit(revs, e->item, e->name); + for (i = 0; i < old_pending.nr; i++) { + struct object_array_entry *e = old_pending.objects + i; + struct commit *commit = handle_commit(revs, e); if (commit) { if (!(commit->object.flags & SEEN)) { commit->object.flags |= SEEN; next = commit_list_append(commit, next); } } - e++; } if (!revs->leak_pending) - free(list); + object_array_clear(&old_pending); /* Signal whether we need per-parent treesame decoration */ if (revs->simplify_merges || diff --git a/revision.h b/revision.h index a6205307cf..9cb5adc4ea 100644 --- a/revision.h +++ b/revision.h @@ -264,11 +264,6 @@ char *path_name(const struct name_path *path, const char *name); extern void show_object_with_name(FILE *, struct object *, const struct name_path *, const char *); -extern void add_object(struct object *obj, - struct object_array *p, - struct name_path *path, - const char *name); - extern void add_pending_object(struct rev_info *revs, struct object *obj, const char *name); extern void add_pending_sha1(struct rev_info *revs, @@ -276,6 +271,8 @@ extern void add_pending_sha1(struct rev_info *revs, unsigned int flags); extern void add_head_to_pending(struct rev_info *); +extern void add_reflogs_to_pending(struct rev_info *, unsigned int flags); +extern void add_index_objects_to_pending(struct rev_info *, unsigned int flags); enum commit_action { commit_ignore, diff --git a/sha1_file.c b/sha1_file.c index 83f77f01b6..d7f1838c13 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -413,14 +413,18 @@ void add_to_alternates_file(const char *reference) link_alt_odb_entries(alt, strlen(alt), '\n', NULL, 0); } -void foreach_alt_odb(alt_odb_fn fn, void *cb) +int foreach_alt_odb(alt_odb_fn fn, void *cb) { struct alternate_object_database *ent; + int r = 0; prepare_alt_odb(); - for (ent = alt_odb_list; ent; ent = ent->next) - if (fn(ent, cb)) - return; + for (ent = alt_odb_list; ent; ent = ent->next) { + r = fn(ent, cb); + if (r) + break; + } + return r; } void prepare_alt_odb(void) @@ -439,27 +443,53 @@ void prepare_alt_odb(void) read_info_alternates(get_object_directory(), 0); } -static int has_loose_object_local(const unsigned char *sha1) +static int freshen_file(const char *fn) { - return !access(sha1_file_name(sha1), F_OK); + struct utimbuf t; + t.actime = t.modtime = time(NULL); + return !utime(fn, &t); } -int has_loose_object_nonlocal(const unsigned char *sha1) +static int check_and_freshen_file(const char *fn, int freshen) +{ + if (access(fn, F_OK)) + return 0; + if (freshen && freshen_file(fn)) + return 0; + return 1; +} + +static int check_and_freshen_local(const unsigned char *sha1, int freshen) +{ + return check_and_freshen_file(sha1_file_name(sha1), freshen); +} + +static int check_and_freshen_nonlocal(const unsigned char *sha1, int freshen) { struct alternate_object_database *alt; prepare_alt_odb(); for (alt = alt_odb_list; alt; alt = alt->next) { fill_sha1_path(alt->name, sha1); - if (!access(alt->base, F_OK)) + if (check_and_freshen_file(alt->base, freshen)) return 1; } return 0; } +static int check_and_freshen(const unsigned char *sha1, int freshen) +{ + return check_and_freshen_local(sha1, freshen) || + check_and_freshen_nonlocal(sha1, freshen); +} + +int has_loose_object_nonlocal(const unsigned char *sha1) +{ + return check_and_freshen_nonlocal(sha1, 0); +} + static int has_loose_object(const unsigned char *sha1) { - return has_loose_object_local(sha1) || - has_loose_object_nonlocal(sha1); + return check_and_freshen(sha1, 0); } static unsigned int pack_used_ctr; @@ -2962,6 +2992,17 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, return move_temp_to_file(tmp_file, filename); } +static int freshen_loose_object(const unsigned char *sha1) +{ + return check_and_freshen(sha1, 1); +} + +static int freshen_packed_object(const unsigned char *sha1) +{ + struct pack_entry e; + return find_pack_entry(sha1, &e) && freshen_file(e.p->pack_name); +} + int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *returnsha1) { unsigned char sha1[20]; @@ -2974,7 +3015,7 @@ int write_sha1_file(const void *buf, unsigned long len, const char *type, unsign write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen); if (returnsha1) hashcpy(returnsha1, sha1); - if (has_sha1_file(sha1)) + if (freshen_loose_object(sha1) || freshen_packed_object(sha1)) return 0; return write_loose_object(sha1, hdr, hdrlen, buf, len, 0); } @@ -3261,3 +3302,149 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect) die("%s is not a valid '%s' object", sha1_to_hex(sha1), typename(expect)); } + +static int for_each_file_in_obj_subdir(int subdir_nr, + struct strbuf *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) +{ + size_t baselen = path->len; + DIR *dir = opendir(path->buf); + struct dirent *de; + int r = 0; + + if (!dir) { + if (errno == ENOENT) + return 0; + return error("unable to open %s: %s", path->buf, strerror(errno)); + } + + while ((de = readdir(dir))) { + if (is_dot_or_dotdot(de->d_name)) + continue; + + strbuf_setlen(path, baselen); + strbuf_addf(path, "/%s", de->d_name); + + if (strlen(de->d_name) == 38) { + char hex[41]; + unsigned char sha1[20]; + + snprintf(hex, sizeof(hex), "%02x%s", + subdir_nr, de->d_name); + if (!get_sha1_hex(hex, sha1)) { + if (obj_cb) { + r = obj_cb(sha1, path->buf, data); + if (r) + break; + } + continue; + } + } + + if (cruft_cb) { + r = cruft_cb(de->d_name, path->buf, data); + if (r) + break; + } + } + strbuf_setlen(path, baselen); + + if (!r && subdir_cb) + r = subdir_cb(subdir_nr, path->buf, data); + + closedir(dir); + return r; +} + +int for_each_loose_file_in_objdir(const char *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) +{ + struct strbuf buf = STRBUF_INIT; + size_t baselen; + int r = 0; + int i; + + strbuf_addstr(&buf, path); + strbuf_addch(&buf, '/'); + baselen = buf.len; + + for (i = 0; i < 256; i++) { + strbuf_addf(&buf, "%02x", i); + r = for_each_file_in_obj_subdir(i, &buf, obj_cb, cruft_cb, + subdir_cb, data); + strbuf_setlen(&buf, baselen); + if (r) + break; + } + + strbuf_release(&buf); + return r; +} + +struct loose_alt_odb_data { + each_loose_object_fn *cb; + void *data; +}; + +static int loose_from_alt_odb(struct alternate_object_database *alt, + void *vdata) +{ + struct loose_alt_odb_data *data = vdata; + return for_each_loose_file_in_objdir(alt->base, + data->cb, NULL, NULL, + data->data); +} + +int for_each_loose_object(each_loose_object_fn cb, void *data) +{ + struct loose_alt_odb_data alt; + int r; + + r = for_each_loose_file_in_objdir(get_object_directory(), + cb, NULL, NULL, data); + if (r) + return r; + + alt.cb = cb; + alt.data = data; + return foreach_alt_odb(loose_from_alt_odb, &alt); +} + +static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data) +{ + uint32_t i; + int r = 0; + + for (i = 0; i < p->num_objects; i++) { + const unsigned char *sha1 = nth_packed_object_sha1(p, i); + + if (!sha1) + return error("unable to get sha1 of object %u in %s", + i, p->pack_name); + + r = cb(sha1, p, i, data); + if (r) + break; + } + return r; +} + +int for_each_packed_object(each_packed_object_fn cb, void *data) +{ + struct packed_git *p; + int r = 0; + + prepare_packed_git(); + for (p = packed_git; p; p = p->next) { + r = for_each_object_in_pack(p, cb, data); + if (r) + break; + } + return r; +} diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index 67e0ab3462..7c8a769a90 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1277,4 +1277,17 @@ EOF git push --no-thin --receive-pack="$rcvpck" no-thin/.git refs/heads/master:refs/heads/foo ' +test_expect_success 'pushing a tag pushes the tagged object' ' + rm -rf dst.git && + blob=$(echo unreferenced | git hash-object -w --stdin) && + git tag -m foo tag-of-blob $blob && + git init --bare dst.git && + git push dst.git tag-of-blob && + # the receiving index-pack should have noticed + # any problems, but we double check + echo unreferenced >expect && + git --git-dir=dst.git cat-file blob tag-of-blob >actual && + test_cmp expect actual +' + test_done diff --git a/t/t6000-rev-list-misc.sh b/t/t6000-rev-list-misc.sh index 3794e4ceaf..2602086303 100755 --- a/t/t6000-rev-list-misc.sh +++ b/t/t6000-rev-list-misc.sh @@ -73,4 +73,27 @@ test_expect_success 'symleft flag bit is propagated down from tag' ' test_cmp expect actual ' +test_expect_success 'rev-list can show index objects' ' + # Of the blobs and trees in the index, note: + # + # - we do not show two/three, because it is the + # same blob as "one", and we show objects only once + # + # - we do show the tree "two", because it has a valid cache tree + # from the last commit + # + # - we do not show the root tree; since we updated the index, it + # does not have a valid cache tree + # + cat >expect <<-\EOF + 8e4020bb5a8d8c873b25de15933e75cc0fc275df one + d9d3a7417b9605cfd88ee6306b28dadc29e6ab08 only-in-index + 9200b628cf9dc883a85a7abc8d6e6730baee589c two + EOF + echo only-in-index >only-in-index && + git add only-in-index && + git rev-list --objects --indexed-objects >actual && + test_cmp expect actual +' + test_done diff --git a/t/t6501-freshen-objects.sh b/t/t6501-freshen-objects.sh new file mode 100755 index 0000000000..157f3f91db --- /dev/null +++ b/t/t6501-freshen-objects.sh @@ -0,0 +1,132 @@ +#!/bin/sh +# +# This test covers the handling of objects which might have old +# mtimes in the filesystem (because they were used previously) +# and are just now becoming referenced again. +# +# We're going to do two things that are a little bit "fake" to +# help make our simulation easier: +# +# 1. We'll turn off reflogs. You can still run into +# problems with reflogs on, but your objects +# don't get pruned until both the reflog expiration +# has passed on their references, _and_ they are out +# of prune's expiration period. Dropping reflogs +# means we only have to deal with one variable in our tests, +# but the results generalize. +# +# 2. We'll use a temporary index file to create our +# works-in-progress. Most workflows would mention +# referenced objects in the index, which prune takes +# into account. However, many operations don't. For +# example, a partial commit with "git commit foo" +# will use a temporary index. Or they may not need +# an index at all (e.g., creating a new commit +# to refer to an existing tree). + +test_description='check pruning of dependent objects' +. ./test-lib.sh + +# We care about reachability, so we do not want to use +# the normal test_commit, which creates extra tags. +add () { + echo "$1" >"$1" && + git add "$1" +} +commit () { + test_tick && + add "$1" && + git commit -m "$1" +} + +maybe_repack () { + if test -n "$repack"; then + git repack -ad + fi +} + +for repack in '' true; do + title=${repack:+repack} + title=${title:-loose} + + test_expect_success "make repo completely empty ($title)" ' + rm -rf .git && + git init + ' + + test_expect_success "disable reflogs ($title)" ' + git config core.logallrefupdates false && + rm -rf .git/logs + ' + + test_expect_success "setup basic history ($title)" ' + commit base + ' + + test_expect_success "create and abandon some objects ($title)" ' + git checkout -b experiment && + commit abandon && + maybe_repack && + git checkout master && + git branch -D experiment + ' + + test_expect_success "simulate time passing ($title)" ' + find .git/objects -type f | + xargs test-chmtime -v -86400 + ' + + test_expect_success "start writing new commit with old blob ($title)" ' + tree=$( + GIT_INDEX_FILE=index.tmp && + export GIT_INDEX_FILE && + git read-tree HEAD && + add unrelated && + add abandon && + git write-tree + ) + ' + + test_expect_success "simultaneous gc ($title)" ' + git gc --prune=12.hours.ago + ' + + test_expect_success "finish writing out commit ($title)" ' + commit=$(echo foo | git commit-tree -p HEAD $tree) && + git update-ref HEAD $commit + ' + + # "abandon" blob should have been rescued by reference from new tree + test_expect_success "repository passes fsck ($title)" ' + git fsck + ' + + test_expect_success "abandon objects again ($title)" ' + git reset --hard HEAD^ && + find .git/objects -type f | + xargs test-chmtime -v -86400 + ' + + test_expect_success "start writing new commit with same tree ($title)" ' + tree=$( + GIT_INDEX_FILE=index.tmp && + export GIT_INDEX_FILE && + git read-tree HEAD && + add abandon && + add unrelated && + git write-tree + ) + ' + + test_expect_success "simultaneous gc ($title)" ' + git gc --prune=12.hours.ago + ' + + # tree should have been refreshed by write-tree + test_expect_success "finish writing out commit ($title)" ' + commit=$(echo foo | git commit-tree -p HEAD $tree) && + git update-ref HEAD $commit + ' +done + +test_done diff --git a/t/t7701-repack-unpack-unreachable.sh b/t/t7701-repack-unpack-unreachable.sh index b8d4cdea8c..aad8a9c64d 100755 --- a/t/t7701-repack-unpack-unreachable.sh +++ b/t/t7701-repack-unpack-unreachable.sh @@ -109,4 +109,17 @@ test_expect_success 'do not bother loosening old objects' ' test_must_fail git cat-file -p $obj2 ' +test_expect_success 'keep packed objects found only in index' ' + echo my-unique-content >file && + git add file && + git commit -m "make it reachable" && + git gc && + git reset HEAD^ && + git reflog expire --expire=now --all && + git add file && + test-chmtime =-86400 .git/objects/pack/* && + git gc --prune=1.hour.ago && + git cat-file blob :file +' + test_done diff --git a/urlmatch.c b/urlmatch.c index 3d4c54b5cd..618d216491 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -43,11 +43,11 @@ static int append_normalized_escapes(struct strbuf *buf, from_len--; if (ch == '%') { if (from_len < 2 || - !isxdigit((unsigned char)from[0]) || - !isxdigit((unsigned char)from[1])) + !isxdigit(from[0]) || + !isxdigit(from[1])) return 0; - ch = hexval_table[(unsigned char)*from++] << 4; - ch |= hexval_table[(unsigned char)*from++]; + ch = hexval(*from++) << 4; + ch |= hexval(*from++); from_len -= 2; was_esc = 1; } |