From eb32d236df0c16b936b04f0c5402addb61cdb311 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:06:49 -0400 Subject: introduce delta objects with offset to base This adds a new object, namely OBJ_OFS_DELTA, renames OBJ_DELTA to OBJ_REF_DELTA to better make the distinction between those two delta objects, and adds support for the handling of those new delta objects in sha1_file.c only. The OBJ_OFS_DELTA contains a relative offset from the delta object's position in a pack instead of the 20-byte SHA1 reference to identify the base object. Since the base is likely to be not so far away, the relative offset is more likely to have a smaller encoding on average than an absolute offset. And for those delta objects the base must always be stored first because there is no way to know the distance of later objects when streaming a pack. Hence this relative offset is always meant to be negative. The offset encoding is slightly denser than the one used for object size -- credits to (whoever this is) for bringing it to my attention. This allows for pack size reduction between 3.2% (Linux-2.6) to over 5% (linux-historic). Runtime pack access should be faster too since delta replay does skip a search in the pack index for each delta in a chain. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- index-pack.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'index-pack.c') diff --git a/index-pack.c b/index-pack.c index 80bc6cb45b..aef7f0a32e 100644 --- a/index-pack.c +++ b/index-pack.c @@ -158,7 +158,7 @@ static void *unpack_raw_entry(unsigned long offset, } switch (type) { - case OBJ_DELTA: + case OBJ_REF_DELTA: if (pos + 20 >= pack_limit) bad_object(offset, "object extends past end of pack"); hashcpy(delta_base, pack_base + pos); @@ -301,7 +301,7 @@ static void parse_pack_objects(void) data = unpack_raw_entry(offset, &obj->type, &data_size, base_sha1, &offset); obj->real_type = obj->type; - if (obj->type == OBJ_DELTA) { + if (obj->type == OBJ_REF_DELTA) { struct delta_entry *delta = &deltas[nr_deltas++]; delta->obj = obj; hashcpy(delta->base_sha1, base_sha1); @@ -328,7 +328,7 @@ static void parse_pack_objects(void) struct object_entry *obj = &objects[i]; int j, first, last; - if (obj->type == OBJ_DELTA) + if (obj->type == OBJ_REF_DELTA) continue; if (find_deltas_based_on_sha1(obj->sha1, &first, &last)) continue; @@ -341,7 +341,7 @@ static void parse_pack_objects(void) /* Check for unresolved deltas */ for (i = 0; i < nr_deltas; i++) { - if (deltas[i].obj->real_type == OBJ_DELTA) + if (deltas[i].obj->real_type == OBJ_REF_DELTA) die("packfile '%s' has unresolved deltas", pack_name); } } -- cgit v1.2.1 From 53dda6ff6263a3f350514d9edae600468c946ed4 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:08:33 -0400 Subject: teach git-index-pack about deltas with offset to base Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- index-pack.c | 111 ++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 80 insertions(+), 31 deletions(-) (limited to 'index-pack.c') diff --git a/index-pack.c b/index-pack.c index aef7f0a32e..fffddd25c9 100644 --- a/index-pack.c +++ b/index-pack.c @@ -18,10 +18,15 @@ struct object_entry unsigned char sha1[20]; }; +union delta_base { + unsigned char sha1[20]; + unsigned long offset; +}; + struct delta_entry { struct object_entry *obj; - unsigned char base_sha1[20]; + union delta_base base; }; static const char *pack_name; @@ -134,13 +139,13 @@ static void *unpack_entry_data(unsigned long offset, static void *unpack_raw_entry(unsigned long offset, enum object_type *obj_type, unsigned long *obj_size, - unsigned char *delta_base, + union delta_base *delta_base, unsigned long *next_obj_offset) { unsigned long pack_limit = pack_size - 20; unsigned long pos = offset; unsigned char c; - unsigned long size; + unsigned long size, base_offset; unsigned shift; enum object_type type; void *data; @@ -161,26 +166,43 @@ static void *unpack_raw_entry(unsigned long offset, case OBJ_REF_DELTA: if (pos + 20 >= pack_limit) bad_object(offset, "object extends past end of pack"); - hashcpy(delta_base, pack_base + pos); + hashcpy(delta_base->sha1, pack_base + pos); pos += 20; - /* fallthru */ + break; + case OBJ_OFS_DELTA: + memset(delta_base, 0, sizeof(*delta_base)); + c = pack_base[pos++]; + base_offset = c & 127; + while (c & 128) { + base_offset += 1; + if (!base_offset || base_offset & ~(~0UL >> 7)) + bad_object(offset, "offset value overflow for delta base object"); + if (pos >= pack_limit) + bad_object(offset, "object extends past end of pack"); + c = pack_base[pos++]; + base_offset = (base_offset << 7) + (c & 127); + } + delta_base->offset = offset - base_offset; + if (delta_base->offset >= offset) + bad_object(offset, "delta base offset is out of bound"); + break; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - data = unpack_entry_data(offset, &pos, size); break; default: bad_object(offset, "bad object type %d", type); } + data = unpack_entry_data(offset, &pos, size); *obj_type = type; *obj_size = size; *next_obj_offset = pos; return data; } -static int find_delta(const unsigned char *base_sha1) +static int find_delta(const union delta_base *base) { int first = 0, last = nr_deltas; @@ -189,7 +211,7 @@ static int find_delta(const unsigned char *base_sha1) struct delta_entry *delta = &deltas[next]; int cmp; - cmp = hashcmp(base_sha1, delta->base_sha1); + cmp = memcmp(base, &delta->base, sizeof(*base)); if (!cmp) return next; if (cmp < 0) { @@ -201,18 +223,18 @@ static int find_delta(const unsigned char *base_sha1) return -first-1; } -static int find_deltas_based_on_sha1(const unsigned char *base_sha1, - int *first_index, int *last_index) +static int find_delta_childs(const union delta_base *base, + int *first_index, int *last_index) { - int first = find_delta(base_sha1); + int first = find_delta(base); int last = first; int end = nr_deltas - 1; if (first < 0) return -1; - while (first > 0 && !hashcmp(deltas[first - 1].base_sha1, base_sha1)) + while (first > 0 && !memcmp(&deltas[first - 1].base, base, sizeof(*base))) --first; - while (last < end && !hashcmp(deltas[last + 1].base_sha1, base_sha1)) + while (last < end && !memcmp(&deltas[last + 1].base, base, sizeof(*base))) ++last; *first_index = first; *last_index = last; @@ -253,13 +275,13 @@ static void resolve_delta(struct delta_entry *delta, void *base_data, void *result; unsigned long result_size; enum object_type delta_type; - unsigned char base_sha1[20]; + union delta_base delta_base; unsigned long next_obj_offset; int j, first, last; obj->real_type = type; delta_data = unpack_raw_entry(obj->offset, &delta_type, - &delta_size, base_sha1, + &delta_size, &delta_base, &next_obj_offset); result = patch_delta(base_data, base_size, delta_data, delta_size, &result_size); @@ -267,10 +289,22 @@ static void resolve_delta(struct delta_entry *delta, void *base_data, if (!result) bad_object(obj->offset, "failed to apply delta"); sha1_object(result, result_size, type, obj->sha1); - if (!find_deltas_based_on_sha1(obj->sha1, &first, &last)) { + + hashcpy(delta_base.sha1, obj->sha1); + if (!find_delta_childs(&delta_base, &first, &last)) { for (j = first; j <= last; j++) - resolve_delta(&deltas[j], result, result_size, type); + if (deltas[j].obj->type == OBJ_REF_DELTA) + resolve_delta(&deltas[j], result, result_size, type); } + + memset(&delta_base, 0, sizeof(delta_base)); + delta_base.offset = obj->offset; + if (!find_delta_childs(&delta_base, &first, &last)) { + for (j = first; j <= last; j++) + if (deltas[j].obj->type == OBJ_OFS_DELTA) + resolve_delta(&deltas[j], result, result_size, type); + } + free(result); } @@ -278,14 +312,14 @@ static int compare_delta_entry(const void *a, const void *b) { const struct delta_entry *delta_a = a; const struct delta_entry *delta_b = b; - return hashcmp(delta_a->base_sha1, delta_b->base_sha1); + return memcmp(&delta_a->base, &delta_b->base, sizeof(union delta_base)); } static void parse_pack_objects(void) { int i; unsigned long offset = sizeof(struct pack_header); - unsigned char base_sha1[20]; + struct delta_entry *delta = deltas; void *data; unsigned long data_size; @@ -299,12 +333,12 @@ static void parse_pack_objects(void) struct object_entry *obj = &objects[i]; obj->offset = offset; data = unpack_raw_entry(offset, &obj->type, &data_size, - base_sha1, &offset); + &delta->base, &offset); obj->real_type = obj->type; - if (obj->type == OBJ_REF_DELTA) { - struct delta_entry *delta = &deltas[nr_deltas++]; + if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { + nr_deltas++; delta->obj = obj; - hashcpy(delta->base_sha1, base_sha1); + delta++; } else sha1_object(data, data_size, obj->type, obj->sha1); free(data); @@ -312,7 +346,7 @@ static void parse_pack_objects(void) if (offset != pack_size - 20) die("packfile '%s' has junk at the end", pack_name); - /* Sort deltas by base SHA1 for fast searching */ + /* Sort deltas by base SHA1/offset for fast searching */ qsort(deltas, nr_deltas, sizeof(struct delta_entry), compare_delta_entry); @@ -326,22 +360,37 @@ static void parse_pack_objects(void) */ for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - int j, first, last; + union delta_base base; + int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last; - if (obj->type == OBJ_REF_DELTA) + if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) continue; - if (find_deltas_based_on_sha1(obj->sha1, &first, &last)) + hashcpy(base.sha1, obj->sha1); + ref = !find_delta_childs(&base, &ref_first, &ref_last); + memset(&base, 0, sizeof(base)); + base.offset = obj->offset; + ofs = !find_delta_childs(&base, &ofs_first, &ofs_last); + if (!ref && !ofs) continue; data = unpack_raw_entry(obj->offset, &obj->type, &data_size, - base_sha1, &offset); - for (j = first; j <= last; j++) - resolve_delta(&deltas[j], data, data_size, obj->type); + &base, &offset); + if (ref) + for (j = ref_first; j <= ref_last; j++) + if (deltas[j].obj->type == OBJ_REF_DELTA) + resolve_delta(&deltas[j], data, + data_size, obj->type); + if (ofs) + for (j = ofs_first; j <= ofs_last; j++) + if (deltas[j].obj->type == OBJ_OFS_DELTA) + resolve_delta(&deltas[j], data, + data_size, obj->type); free(data); } /* Check for unresolved deltas */ for (i = 0; i < nr_deltas; i++) { - if (deltas[i].obj->real_type == OBJ_REF_DELTA) + if (deltas[i].obj->real_type == OBJ_REF_DELTA || + deltas[i].obj->real_type == OBJ_OFS_DELTA) die("packfile '%s' has unresolved deltas", pack_name); } } -- cgit v1.2.1 From 3c552873c698117689af4e5159c7e491fe3a89a3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 17 Oct 2006 16:23:26 -0400 Subject: index-pack: compare only the first 20-bytes of the key. The "union delta_base" is a strange beast. It is a 20-byte binary blob key to search a binary searchable deltas[] array, each element of which uses it to represent its base object with either a full 20-byte SHA-1 or an offset in the pack. Which representation is used is determined by another field of the deltas[] array element, obj->type, so there is no room for confusion, as long as we make sure we compare the keys for the same type only with appropriate length. The code compared the full union with memcmp(). When storing the in-pack offset, the union was first cleared before storing an unsigned long, so comparison worked fine. On 64-bit architectures, however, the union typically is 24-byte long; the code did not clear the remaining 4-byte alignment padding when storing a full 20-byte SHA-1 representation. Using memcmp() to compare the whole union was wrong. This fixes the comparison to look at the first 20-bytes of the union, regardless of the architecture. As long as ulong is smaller than 20-bytes this works fine. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- index-pack.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'index-pack.c') diff --git a/index-pack.c b/index-pack.c index fffddd25c9..56c590e3fa 100644 --- a/index-pack.c +++ b/index-pack.c @@ -23,6 +23,12 @@ union delta_base { unsigned long offset; }; +/* + * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want + * to memcmp() only the first 20 bytes. + */ +#define UNION_BASE_SZ 20 + struct delta_entry { struct object_entry *obj; @@ -211,7 +217,7 @@ static int find_delta(const union delta_base *base) struct delta_entry *delta = &deltas[next]; int cmp; - cmp = memcmp(base, &delta->base, sizeof(*base)); + cmp = memcmp(base, &delta->base, UNION_BASE_SZ); if (!cmp) return next; if (cmp < 0) { @@ -232,9 +238,9 @@ static int find_delta_childs(const union delta_base *base, if (first < 0) return -1; - while (first > 0 && !memcmp(&deltas[first - 1].base, base, sizeof(*base))) + while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ)) --first; - while (last < end && !memcmp(&deltas[last + 1].base, base, sizeof(*base))) + while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ)) ++last; *first_index = first; *last_index = last; @@ -312,7 +318,7 @@ static int compare_delta_entry(const void *a, const void *b) { const struct delta_entry *delta_a = a; const struct delta_entry *delta_b = b; - return memcmp(&delta_a->base, &delta_b->base, sizeof(union delta_base)); + return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ); } static void parse_pack_objects(void) -- cgit v1.2.1 From 2d477051ef260aad352d63fc7d9c07e4ebb4359b Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 20 Oct 2006 14:45:21 -0400 Subject: add the capability for index-pack to read from a stream This patch only adds the streaming capability to index-pack. Although the code is different it has the exact same functionality as before to make sure nothing broke. This is in preparation for receiving packs over the net, parse them on the fly, fix them up if they are "thin" packs, and keep the resulting pack instead of exploding it into loose objects. But such functionality should come separately. One immediate advantage of this patch is that index-pack can now deal with packs up to 4GB in size even on 32-bit architectures since the pack is not entirely mmap()'d all at once anymore. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- index-pack.c | 244 ++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 143 insertions(+), 101 deletions(-) (limited to 'index-pack.c') diff --git a/index-pack.c b/index-pack.c index 56c590e3fa..e33f60524f 100644 --- a/index-pack.c +++ b/index-pack.c @@ -13,6 +13,8 @@ static const char index_pack_usage[] = struct object_entry { unsigned long offset; + unsigned long size; + unsigned int hdr_size; enum object_type type; enum object_type real_type; unsigned char sha1[20]; @@ -36,51 +38,68 @@ struct delta_entry }; static const char *pack_name; -static unsigned char *pack_base; -static unsigned long pack_size; static struct object_entry *objects; static struct delta_entry *deltas; static int nr_objects; static int nr_deltas; -static void open_pack_file(void) +/* We always read in 4kB chunks. */ +static unsigned char input_buffer[4096]; +static unsigned long input_offset, input_len, consumed_bytes; +static SHA_CTX input_ctx; +static int input_fd; + +/* + * Make sure at least "min" bytes are available in the buffer, and + * return the pointer to the buffer. + */ +static void * fill(int min) { - int fd; - struct stat st; + if (min <= input_len) + return input_buffer + input_offset; + if (min > sizeof(input_buffer)) + die("cannot fill %d bytes", min); + if (input_offset) { + SHA1_Update(&input_ctx, input_buffer, input_offset); + memcpy(input_buffer, input_buffer + input_offset, input_len); + input_offset = 0; + } + do { + int ret = xread(input_fd, input_buffer + input_len, + sizeof(input_buffer) - input_len); + if (ret <= 0) { + if (!ret) + die("early EOF"); + die("read error on input: %s", strerror(errno)); + } + input_len += ret; + } while (input_len < min); + return input_buffer; +} + +static void use(int bytes) +{ + if (bytes > input_len) + die("used more bytes than were available"); + input_len -= bytes; + input_offset += bytes; + consumed_bytes += bytes; +} - fd = open(pack_name, O_RDONLY); - if (fd < 0) +static void open_pack_file(void) +{ + input_fd = open(pack_name, O_RDONLY); + if (input_fd < 0) die("cannot open packfile '%s': %s", pack_name, strerror(errno)); - if (fstat(fd, &st)) { - int err = errno; - close(fd); - die("cannot fstat packfile '%s': %s", pack_name, - strerror(err)); - } - pack_size = st.st_size; - pack_base = mmap(NULL, pack_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (pack_base == MAP_FAILED) { - int err = errno; - close(fd); - die("cannot mmap packfile '%s': %s", pack_name, - strerror(err)); - } - close(fd); + SHA1_Init(&input_ctx); } static void parse_pack_header(void) { - const struct pack_header *hdr; - unsigned char sha1[20]; - SHA_CTX ctx; - - /* Ensure there are enough bytes for the header and final SHA1 */ - if (pack_size < sizeof(struct pack_header) + 20) - die("packfile '%s' is too small", pack_name); + struct pack_header *hdr = fill(sizeof(struct pack_header)); /* Header consistency check */ - hdr = (void *)pack_base; if (hdr->hdr_signature != htonl(PACK_SIGNATURE)) die("packfile '%s' signature mismatch", pack_name); if (!pack_version_ok(hdr->hdr_version)) @@ -88,13 +107,8 @@ static void parse_pack_header(void) pack_name, ntohl(hdr->hdr_version)); nr_objects = ntohl(hdr->hdr_entries); - - /* Check packfile integrity */ - SHA1_Init(&ctx); - SHA1_Update(&ctx, pack_base, pack_size - 20); - SHA1_Final(sha1, &ctx); - if (hashcmp(sha1, pack_base + pack_size - 20)) - die("packfile '%s' SHA1 mismatch", pack_name); + use(sizeof(struct pack_header)); + /*fprintf(stderr, "Indexing %d objects\n", nr_objects);*/ } static void bad_object(unsigned long offset, const char *format, @@ -112,85 +126,78 @@ static void bad_object(unsigned long offset, const char *format, ...) pack_name, offset, buf); } -static void *unpack_entry_data(unsigned long offset, - unsigned long *current_pos, unsigned long size) +static void *unpack_entry_data(unsigned long offset, unsigned long size) { - unsigned long pack_limit = pack_size - 20; - unsigned long pos = *current_pos; z_stream stream; void *buf = xmalloc(size); memset(&stream, 0, sizeof(stream)); stream.next_out = buf; stream.avail_out = size; - stream.next_in = pack_base + pos; - stream.avail_in = pack_limit - pos; + stream.next_in = fill(1); + stream.avail_in = input_len; inflateInit(&stream); for (;;) { int ret = inflate(&stream, 0); - if (ret == Z_STREAM_END) + use(input_len - stream.avail_in); + if (stream.total_out == size && ret == Z_STREAM_END) break; if (ret != Z_OK) bad_object(offset, "inflate returned %d", ret); + stream.next_in = fill(1); + stream.avail_in = input_len; } inflateEnd(&stream); - if (stream.total_out != size) - bad_object(offset, "size mismatch (expected %lu, got %lu)", - size, stream.total_out); - *current_pos = pack_limit - stream.avail_in; return buf; } -static void *unpack_raw_entry(unsigned long offset, - enum object_type *obj_type, - unsigned long *obj_size, - union delta_base *delta_base, - unsigned long *next_obj_offset) +static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base) { - unsigned long pack_limit = pack_size - 20; - unsigned long pos = offset; - unsigned char c; + unsigned char *p, c; unsigned long size, base_offset; unsigned shift; - enum object_type type; - void *data; - c = pack_base[pos++]; - type = (c >> 4) & 7; + obj->offset = consumed_bytes; + + p = fill(1); + c = *p; + use(1); + obj->type = (c >> 4) & 7; size = (c & 15); shift = 4; while (c & 0x80) { - if (pos >= pack_limit) - bad_object(offset, "object extends past end of pack"); - c = pack_base[pos++]; + p = fill(1); + c = *p; + use(1); size += (c & 0x7fUL) << shift; shift += 7; } + obj->size = size; - switch (type) { + switch (obj->type) { case OBJ_REF_DELTA: - if (pos + 20 >= pack_limit) - bad_object(offset, "object extends past end of pack"); - hashcpy(delta_base->sha1, pack_base + pos); - pos += 20; + hashcpy(delta_base->sha1, fill(20)); + use(20); break; case OBJ_OFS_DELTA: memset(delta_base, 0, sizeof(*delta_base)); - c = pack_base[pos++]; + p = fill(1); + c = *p; + use(1); base_offset = c & 127; while (c & 128) { base_offset += 1; if (!base_offset || base_offset & ~(~0UL >> 7)) - bad_object(offset, "offset value overflow for delta base object"); - if (pos >= pack_limit) - bad_object(offset, "object extends past end of pack"); - c = pack_base[pos++]; + bad_object(obj->offset, "offset value overflow for delta base object"); + p = fill(1); + c = *p; + use(1); base_offset = (base_offset << 7) + (c & 127); } - delta_base->offset = offset - base_offset; - if (delta_base->offset >= offset) - bad_object(offset, "delta base offset is out of bound"); + delta_base->offset = obj->offset - base_offset; + if (delta_base->offset >= obj->offset) + bad_object(obj->offset, "delta base offset is out of bound"); break; case OBJ_COMMIT: case OBJ_TREE: @@ -198,13 +205,38 @@ static void *unpack_raw_entry(unsigned long offset, case OBJ_TAG: break; default: - bad_object(offset, "bad object type %d", type); + bad_object(obj->offset, "bad object type %d", obj->type); } + obj->hdr_size = consumed_bytes - obj->offset; + + return unpack_entry_data(obj->offset, obj->size); +} + +static void * get_data_from_pack(struct object_entry *obj) +{ + unsigned long from = obj[0].offset + obj[0].hdr_size; + unsigned long len = obj[1].offset - from; + unsigned pg_offset = from % getpagesize(); + unsigned char *map, *data; + z_stream stream; + int st; - data = unpack_entry_data(offset, &pos, size); - *obj_type = type; - *obj_size = size; - *next_obj_offset = pos; + map = mmap(NULL, len + pg_offset, PROT_READ, MAP_PRIVATE, + input_fd, from - pg_offset); + if (map == MAP_FAILED) + die("cannot mmap packfile '%s': %s", pack_name, strerror(errno)); + data = xmalloc(obj->size); + memset(&stream, 0, sizeof(stream)); + stream.next_out = data; + stream.avail_out = obj->size; + stream.next_in = map + pg_offset; + stream.avail_in = len; + inflateInit(&stream); + while ((st = inflate(&stream, Z_FINISH)) == Z_OK); + inflateEnd(&stream); + if (st != Z_STREAM_END || stream.total_out != obj->size) + die("serious inflate inconsistency"); + munmap(map, len + pg_offset); return data; } @@ -280,15 +312,12 @@ static void resolve_delta(struct delta_entry *delta, void *base_data, unsigned long delta_size; void *result; unsigned long result_size; - enum object_type delta_type; union delta_base delta_base; - unsigned long next_obj_offset; int j, first, last; obj->real_type = type; - delta_data = unpack_raw_entry(obj->offset, &delta_type, - &delta_size, &delta_base, - &next_obj_offset); + delta_data = get_data_from_pack(obj); + delta_size = obj->size; result = patch_delta(base_data, base_size, delta_data, delta_size, &result_size); free(delta_data); @@ -321,13 +350,13 @@ static int compare_delta_entry(const void *a, const void *b) return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ); } -static void parse_pack_objects(void) +/* Parse all objects and return the pack content SHA1 hash */ +static void parse_pack_objects(unsigned char *sha1) { int i; - unsigned long offset = sizeof(struct pack_header); struct delta_entry *delta = deltas; void *data; - unsigned long data_size; + struct stat st; /* * First pass: @@ -337,19 +366,29 @@ static void parse_pack_objects(void) */ for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - obj->offset = offset; - data = unpack_raw_entry(offset, &obj->type, &data_size, - &delta->base, &offset); + data = unpack_raw_entry(obj, &delta->base); obj->real_type = obj->type; if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { nr_deltas++; delta->obj = obj; delta++; } else - sha1_object(data, data_size, obj->type, obj->sha1); + sha1_object(data, obj->size, obj->type, obj->sha1); free(data); } - if (offset != pack_size - 20) + objects[i].offset = consumed_bytes; + + /* Check pack integrity */ + SHA1_Update(&input_ctx, input_buffer, input_offset); + SHA1_Final(sha1, &input_ctx); + if (hashcmp(fill(20), sha1)) + die("packfile '%s' SHA1 mismatch", pack_name); + use(20); + + /* If input_fd is a file, we should have reached its end now. */ + if (fstat(input_fd, &st)) + die("cannot fstat packfile '%s': %s", pack_name, strerror(errno)); + if (S_ISREG(st.st_mode) && st.st_size != consumed_bytes) die("packfile '%s' has junk at the end", pack_name); /* Sort deltas by base SHA1/offset for fast searching */ @@ -378,18 +417,17 @@ static void parse_pack_objects(void) ofs = !find_delta_childs(&base, &ofs_first, &ofs_last); if (!ref && !ofs) continue; - data = unpack_raw_entry(obj->offset, &obj->type, &data_size, - &base, &offset); + data = get_data_from_pack(obj); if (ref) for (j = ref_first; j <= ref_last; j++) if (deltas[j].obj->type == OBJ_REF_DELTA) resolve_delta(&deltas[j], data, - data_size, obj->type); + obj->size, obj->type); if (ofs) for (j = ofs_first; j <= ofs_last; j++) if (deltas[j].obj->type == OBJ_OFS_DELTA) resolve_delta(&deltas[j], data, - data_size, obj->type); + obj->size, obj->type); free(data); } @@ -408,6 +446,10 @@ static int sha1_compare(const void *_a, const void *_b) return hashcmp(a->sha1, b->sha1); } +/* + * On entry *sha1 contains the pack content SHA1 hash, on exit it is + * the SHA1 hash of sorted object names. + */ static void write_index_file(const char *index_name, unsigned char *sha1) { struct sha1file *f; @@ -467,7 +509,7 @@ static void write_index_file(const char *index_name, unsigned char *sha1) sha1write(f, obj->sha1, 20); SHA1_Update(&ctx, obj->sha1, 20); } - sha1write(f, pack_base + pack_size - 20, 20); + sha1write(f, sha1, 20); sha1close(f, NULL, 1); free(sorted_by_sha); SHA1_Final(sha1, &ctx); @@ -513,9 +555,9 @@ int main(int argc, char **argv) open_pack_file(); parse_pack_header(); - objects = xcalloc(nr_objects, sizeof(struct object_entry)); + objects = xcalloc(nr_objects + 1, sizeof(struct object_entry)); deltas = xcalloc(nr_objects, sizeof(struct delta_entry)); - parse_pack_objects(); + parse_pack_objects(sha1); free(deltas); write_index_file(index_name, sha1); free(objects); -- cgit v1.2.1