From a3ed7552d6c800b476a3bdfec5fa575295ea4bdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 7 Nov 2011 09:59:23 +0700 Subject: fsck: return error code when verify_pack() goes wrong MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- builtin/fsck.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index df1a88b51a..4ead98dcab 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -29,6 +29,7 @@ static int write_lost_and_found; static int verbose; #define ERROR_OBJECT 01 #define ERROR_REACHABLE 02 +#define ERROR_PACK 04 #ifdef NO_D_INO_IN_DIRENT #define SORT_DIRENT 0 @@ -626,7 +627,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) prepare_packed_git(); for (p = packed_git; p; p = p->next) /* verify gives error messages itself */ - verify_pack(p); + if (verify_pack(p)) + errors_found |= ERROR_PACK; for (p = packed_git; p; p = p->next) { uint32_t j, num; -- cgit v1.2.1 From 473935188cc9b0363b30553f23d816231a66cb44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 7 Nov 2011 09:59:24 +0700 Subject: verify_packfile(): check as many object as possible in a pack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit verify_packfile() checks for whole pack integerity first, then each object individually. Once we get past whole pack check, we can identify all objects in the pack. If there's an error with one object, we should continue to check the next objects to salvage as many objects as possible instead of stopping the process. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- pack-check.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pack-check.c b/pack-check.c index 0c19b6e5a5..7ac9b3a681 100644 --- a/pack-check.c +++ b/pack-check.c @@ -113,18 +113,13 @@ static int verify_packfile(struct packed_git *p, p->pack_name, (uintmax_t)offset); } data = unpack_entry(p, entries[i].offset, &type, &size); - if (!data) { + if (!data) err = error("cannot unpack %s from %s at offset %"PRIuMAX"", sha1_to_hex(entries[i].sha1), p->pack_name, (uintmax_t)entries[i].offset); - break; - } - if (check_sha1_signature(entries[i].sha1, data, size, typename(type))) { + else if (check_sha1_signature(entries[i].sha1, data, size, typename(type))) err = error("packed %s from %s is corrupt", sha1_to_hex(entries[i].sha1), p->pack_name); - free(data); - break; - } free(data); } free(entries); -- cgit v1.2.1 From c9486eb04dd99fc00df3e68f9b908f9ad7ff9728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 7 Nov 2011 09:59:25 +0700 Subject: fsck: avoid reading every object twice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During verify_pack() all objects are read for SHA-1 check. Then fsck_sha1() is called on every object, which read the object again (fsck_sha1 -> parse_object -> read_sha1_file). Avoid reading an object twice, do fsck_sha1 while we have an object uncompressed data in verify_pack. On git.git, with this patch I got: $ /usr/bin/time ./git fsck >/dev/null 98.97user 0.90system 1:40.01elapsed 99%CPU (0avgtext+0avgdata 616624maxresident)k 0inputs+0outputs (0major+194186minor)pagefaults 0swaps Without it: $ /usr/bin/time ./git fsck >/dev/null 231.23user 2.35system 3:53.82elapsed 99%CPU (0avgtext+0avgdata 636688maxresident)k 0inputs+0outputs (0major+461629minor)pagefaults 0swaps Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- builtin/fsck.c | 42 +++++++++++++++++++++++++----------------- pack-check.c | 13 ++++++++++--- pack.h | 5 ++++- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 4ead98dcab..0603f6444e 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -282,14 +282,8 @@ static void check_connectivity(void) } } -static int fsck_sha1(const unsigned char *sha1) +static int fsck_obj(struct object *obj) { - struct object *obj = parse_object(sha1); - if (!obj) { - errors_found |= ERROR_OBJECT; - return error("%s: object corrupt or missing", - sha1_to_hex(sha1)); - } if (obj->flags & SEEN) return 0; obj->flags |= SEEN; @@ -332,6 +326,29 @@ static int fsck_sha1(const unsigned char *sha1) return 0; } +static int fsck_sha1(const unsigned char *sha1) +{ + struct object *obj = parse_object(sha1); + if (!obj) { + errors_found |= ERROR_OBJECT; + return error("%s: object corrupt or missing", + sha1_to_hex(sha1)); + } + return fsck_obj(obj); +} + +static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, + unsigned long size, void *buffer, int *eaten) +{ + struct object *obj; + obj = parse_object_buffer(sha1, type, size, buffer, eaten); + if (!obj) { + errors_found |= ERROR_OBJECT; + return error("%s: object corrupt or missing", sha1_to_hex(sha1)); + } + return fsck_obj(obj); +} + /* * This is the sorting chunk size: make it reasonably * big so that we can sort well.. @@ -627,17 +644,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) prepare_packed_git(); for (p = packed_git; p; p = p->next) /* verify gives error messages itself */ - if (verify_pack(p)) + if (verify_pack(p, fsck_obj_buffer)) errors_found |= ERROR_PACK; - - for (p = packed_git; p; p = p->next) { - uint32_t j, num; - if (open_pack_index(p)) - continue; - num = p->num_objects; - for (j = 0; j < num; j++) - fsck_sha1(nth_packed_object_sha1(p, j)); - } } heads = 0; diff --git a/pack-check.c b/pack-check.c index 7ac9b3a681..3b48b86156 100644 --- a/pack-check.c +++ b/pack-check.c @@ -42,7 +42,8 @@ int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, } static int verify_packfile(struct packed_git *p, - struct pack_window **w_curs) + struct pack_window **w_curs, + verify_fn fn) { off_t index_size = p->index_size; const unsigned char *index_base = p->index_data; @@ -120,6 +121,12 @@ static int verify_packfile(struct packed_git *p, else if (check_sha1_signature(entries[i].sha1, data, size, typename(type))) err = error("packed %s from %s is corrupt", sha1_to_hex(entries[i].sha1), p->pack_name); + else if (fn) { + int eaten = 0; + fn(entries[i].sha1, type, size, data, &eaten); + if (eaten) + data = NULL; + } free(data); } free(entries); @@ -150,7 +157,7 @@ int verify_pack_index(struct packed_git *p) return err; } -int verify_pack(struct packed_git *p) +int verify_pack(struct packed_git *p, verify_fn fn) { int err = 0; struct pack_window *w_curs = NULL; @@ -159,7 +166,7 @@ int verify_pack(struct packed_git *p) if (!p->index_data) return -1; - err |= verify_packfile(p, &w_curs); + err |= verify_packfile(p, &w_curs, fn); unuse_pack(&w_curs); return err; diff --git a/pack.h b/pack.h index 722a54e00a..70f3c29bea 100644 --- a/pack.h +++ b/pack.h @@ -70,10 +70,13 @@ struct pack_idx_entry { off_t offset; }; + +typedef int (*verify_fn)(const unsigned char*, enum object_type, unsigned long, void*, int*); + extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, unsigned char *sha1); extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr); extern int verify_pack_index(struct packed_git *); -extern int verify_pack(struct packed_git *); +extern int verify_pack(struct packed_git *, verify_fn fn); extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t); extern char *index_pack_lockfile(int fd); extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned char *); -- cgit v1.2.1 From 1e49f22f07881dffc04f8f09d4ad4e4a65b85b09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 7 Nov 2011 09:59:26 +0700 Subject: fsck: print progress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fsck is usually a long process and it would be nice if it prints progress from time to time. Progress meter is not printed when --verbose is given because --verbose prints a lot, there's no need for "alive" indicator. Progress meter may provide "% complete" information but it would be lost anyway in the flood of text. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/git-fsck.txt | 11 ++++++++++- builtin/fsck.c | 40 ++++++++++++++++++++++++++++++++++++++-- pack-check.c | 14 +++++++++++--- pack.h | 3 ++- 4 files changed, 61 insertions(+), 7 deletions(-) diff --git a/Documentation/git-fsck.txt b/Documentation/git-fsck.txt index a2a508dc28..0a17b4258e 100644 --- a/Documentation/git-fsck.txt +++ b/Documentation/git-fsck.txt @@ -10,7 +10,8 @@ SYNOPSIS -------- [verse] 'git fsck' [--tags] [--root] [--unreachable] [--cache] [--no-reflogs] - [--[no-]full] [--strict] [--verbose] [--lost-found] [*] + [--[no-]full] [--strict] [--verbose] [--lost-found] + [--[no-]progress] [*] DESCRIPTION ----------- @@ -72,6 +73,14 @@ index file, all SHA1 references in .git/refs/*, and all reflogs (unless a blob, the contents are written into the file, rather than its object name. +--progress:: +--no-progress:: + Progress status is reported on the standard error stream by + default when it is attached to a terminal, unless + --no-progress or --verbose is specified. --progress forces + progress status even if the standard error stream is not + directed to a terminal. + It tests SHA1 and general object sanity, and it does full tracking of the resulting reachability and everything else. It prints out any corruption it finds (missing or bad objects), and if you use the diff --git a/builtin/fsck.c b/builtin/fsck.c index 0603f6444e..30d0dc82f0 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -11,6 +11,7 @@ #include "fsck.h" #include "parse-options.h" #include "dir.h" +#include "progress.h" #define REACHABLE 0x0001 #define SEEN 0x0002 @@ -27,6 +28,7 @@ static const char *head_points_at; static int errors_found; static int write_lost_and_found; static int verbose; +static int show_progress = -1; #define ERROR_OBJECT 01 #define ERROR_REACHABLE 02 #define ERROR_PACK 04 @@ -138,7 +140,11 @@ static int traverse_one_object(struct object *obj) static int traverse_reachable(void) { + struct progress *progress = NULL; + unsigned int nr = 0; int result = 0; + if (show_progress) + progress = start_progress_delay("Checking connectivity", 0, 0, 2); while (pending.nr) { struct object_array_entry *entry; struct object *obj; @@ -146,7 +152,9 @@ static int traverse_reachable(void) entry = pending.objects + --pending.nr; obj = entry->item; result |= traverse_one_object(obj); + display_progress(progress, ++nr); } + stop_progress(&progress); return !!result; } @@ -530,15 +538,20 @@ static void get_default_heads(void) static void fsck_object_dir(const char *path) { int i; + struct progress *progress = NULL; if (verbose) fprintf(stderr, "Checking object directory\n"); + if (show_progress) + progress = start_progress("Checking object directories", 256); for (i = 0; i < 256; i++) { static char dir[4096]; sprintf(dir, "%s/%02x", path, i); fsck_dir(i, dir); + display_progress(progress, i+1); } + stop_progress(&progress); fsck_sha1_list(); } @@ -609,6 +622,7 @@ static struct option fsck_opts[] = { OPT_BOOLEAN(0, "strict", &check_strict, "enable more strict checking"), OPT_BOOLEAN(0, "lost-found", &write_lost_and_found, "write dangling objects in .git/lost-found"), + OPT_BOOL(0, "progress", &show_progress, "show progress"), OPT_END(), }; @@ -621,6 +635,12 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) read_replace_refs = 0; argc = parse_options(argc, argv, prefix, fsck_opts, fsck_usage, 0); + + if (show_progress == -1) + show_progress = isatty(2); + if (verbose) + show_progress = 0; + if (write_lost_and_found) { check_full = 1; include_reflogs = 0; @@ -640,12 +660,28 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) if (check_full) { struct packed_git *p; + uint32_t total = 0, count = 0; + struct progress *progress = NULL; prepare_packed_git(); - for (p = packed_git; p; p = p->next) + + if (show_progress) { + for (p = packed_git; p; p = p->next) { + if (open_pack_index(p)) + continue; + total += p->num_objects; + } + + progress = start_progress("Checking objects", total); + } + for (p = packed_git; p; p = p->next) { /* verify gives error messages itself */ - if (verify_pack(p, fsck_obj_buffer)) + if (verify_pack(p, fsck_obj_buffer, + progress, count)) errors_found |= ERROR_PACK; + count += p->num_objects; + } + stop_progress(&progress); } heads = 0; diff --git a/pack-check.c b/pack-check.c index 3b48b86156..63a595c45c 100644 --- a/pack-check.c +++ b/pack-check.c @@ -1,6 +1,7 @@ #include "cache.h" #include "pack.h" #include "pack-revindex.h" +#include "progress.h" struct idx_entry { off_t offset; @@ -43,7 +44,9 @@ int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, static int verify_packfile(struct packed_git *p, struct pack_window **w_curs, - verify_fn fn) + verify_fn fn, + struct progress *progress, uint32_t base_count) + { off_t index_size = p->index_size; const unsigned char *index_base = p->index_data; @@ -127,8 +130,12 @@ static int verify_packfile(struct packed_git *p, if (eaten) data = NULL; } + if (((base_count + i) & 1023) == 0) + display_progress(progress, base_count + i); free(data); + } + display_progress(progress, base_count + i); free(entries); return err; @@ -157,7 +164,8 @@ int verify_pack_index(struct packed_git *p) return err; } -int verify_pack(struct packed_git *p, verify_fn fn) +int verify_pack(struct packed_git *p, verify_fn fn, + struct progress *progress, uint32_t base_count) { int err = 0; struct pack_window *w_curs = NULL; @@ -166,7 +174,7 @@ int verify_pack(struct packed_git *p, verify_fn fn) if (!p->index_data) return -1; - err |= verify_packfile(p, &w_curs, fn); + err |= verify_packfile(p, &w_curs, fn, progress, base_count); unuse_pack(&w_curs); return err; diff --git a/pack.h b/pack.h index 70f3c29bea..324a1d73c0 100644 --- a/pack.h +++ b/pack.h @@ -71,12 +71,13 @@ struct pack_idx_entry { }; +struct progress; typedef int (*verify_fn)(const unsigned char*, enum object_type, unsigned long, void*, int*); extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, unsigned char *sha1); extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr); extern int verify_pack_index(struct packed_git *); -extern int verify_pack(struct packed_git *, verify_fn fn); +extern int verify_pack(struct packed_git *, verify_fn fn, struct progress *, uint32_t); extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t); extern char *index_pack_lockfile(int fd); extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned char *); -- cgit v1.2.1