From eb32d236df0c16b936b04f0c5402addb61cdb311 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Thu, 21 Sep 2006 00:06:49 -0400
Subject: introduce delta objects with offset to base

This adds a new object, namely OBJ_OFS_DELTA, renames OBJ_DELTA to
OBJ_REF_DELTA to better make the distinction between those two delta
objects, and adds support for the handling of those new delta objects
in sha1_file.c only.

The OBJ_OFS_DELTA contains a relative offset from the delta object's
position in a pack instead of the 20-byte SHA1 reference to identify
the base object.  Since the base is likely to be not so far away, the
relative offset is more likely to have a smaller encoding on average
than an absolute offset.  And for those delta objects the base must
always be stored first because there is no way to know the distance of
later objects when streaming a pack.  Hence this relative offset is
always meant to be negative.

The offset encoding is slightly denser than the one used for object
size -- credits to <linux@horizon.com> (whoever this is) for bringing
it to my attention.

This allows for pack size reduction between 3.2% (Linux-2.6) to over 5%
(linux-historic).  Runtime pack access should be faster too since delta
replay does skip a search in the pack index for each delta in a chain.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 index-pack.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'index-pack.c')

diff --git a/index-pack.c b/index-pack.c
index 80bc6cb45b..aef7f0a32e 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -158,7 +158,7 @@ static void *unpack_raw_entry(unsigned long offset,
 	}
 
 	switch (type) {
-	case OBJ_DELTA:
+	case OBJ_REF_DELTA:
 		if (pos + 20 >= pack_limit)
 			bad_object(offset, "object extends past end of pack");
 		hashcpy(delta_base, pack_base + pos);
@@ -301,7 +301,7 @@ static void parse_pack_objects(void)
 		data = unpack_raw_entry(offset, &obj->type, &data_size,
 					base_sha1, &offset);
 		obj->real_type = obj->type;
-		if (obj->type == OBJ_DELTA) {
+		if (obj->type == OBJ_REF_DELTA) {
 			struct delta_entry *delta = &deltas[nr_deltas++];
 			delta->obj = obj;
 			hashcpy(delta->base_sha1, base_sha1);
@@ -328,7 +328,7 @@ static void parse_pack_objects(void)
 		struct object_entry *obj = &objects[i];
 		int j, first, last;
 
-		if (obj->type == OBJ_DELTA)
+		if (obj->type == OBJ_REF_DELTA)
 			continue;
 		if (find_deltas_based_on_sha1(obj->sha1, &first, &last))
 			continue;
@@ -341,7 +341,7 @@ static void parse_pack_objects(void)
 
 	/* Check for unresolved deltas */
 	for (i = 0; i < nr_deltas; i++) {
-		if (deltas[i].obj->real_type == OBJ_DELTA)
+		if (deltas[i].obj->real_type == OBJ_REF_DELTA)
 			die("packfile '%s' has unresolved deltas",  pack_name);
 	}
 }
-- 
cgit v1.2.1


From 53dda6ff6263a3f350514d9edae600468c946ed4 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Thu, 21 Sep 2006 00:08:33 -0400
Subject: teach git-index-pack about deltas with offset to base

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 index-pack.c | 111 ++++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 80 insertions(+), 31 deletions(-)

(limited to 'index-pack.c')

diff --git a/index-pack.c b/index-pack.c
index aef7f0a32e..fffddd25c9 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -18,10 +18,15 @@ struct object_entry
 	unsigned char sha1[20];
 };
 
+union delta_base {
+	unsigned char sha1[20];
+	unsigned long offset;
+};
+
 struct delta_entry
 {
 	struct object_entry *obj;
-	unsigned char base_sha1[20];
+	union delta_base base;
 };
 
 static const char *pack_name;
@@ -134,13 +139,13 @@ static void *unpack_entry_data(unsigned long offset,
 static void *unpack_raw_entry(unsigned long offset,
 			      enum object_type *obj_type,
 			      unsigned long *obj_size,
-			      unsigned char *delta_base,
+			      union delta_base *delta_base,
 			      unsigned long *next_obj_offset)
 {
 	unsigned long pack_limit = pack_size - 20;
 	unsigned long pos = offset;
 	unsigned char c;
-	unsigned long size;
+	unsigned long size, base_offset;
 	unsigned shift;
 	enum object_type type;
 	void *data;
@@ -161,26 +166,43 @@ static void *unpack_raw_entry(unsigned long offset,
 	case OBJ_REF_DELTA:
 		if (pos + 20 >= pack_limit)
 			bad_object(offset, "object extends past end of pack");
-		hashcpy(delta_base, pack_base + pos);
+		hashcpy(delta_base->sha1, pack_base + pos);
 		pos += 20;
-		/* fallthru */
+		break;
+	case OBJ_OFS_DELTA:
+		memset(delta_base, 0, sizeof(*delta_base));
+		c = pack_base[pos++];
+		base_offset = c & 127;
+		while (c & 128) {
+			base_offset += 1;
+			if (!base_offset || base_offset & ~(~0UL >> 7))
+				bad_object(offset, "offset value overflow for delta base object");
+			if (pos >= pack_limit)
+				bad_object(offset, "object extends past end of pack");
+			c = pack_base[pos++];
+			base_offset = (base_offset << 7) + (c & 127);
+		}
+		delta_base->offset = offset - base_offset;
+		if (delta_base->offset >= offset)
+			bad_object(offset, "delta base offset is out of bound");
+		break;
 	case OBJ_COMMIT:
 	case OBJ_TREE:
 	case OBJ_BLOB:
 	case OBJ_TAG:
-		data = unpack_entry_data(offset, &pos, size);
 		break;
 	default:
 		bad_object(offset, "bad object type %d", type);
 	}
 
+	data = unpack_entry_data(offset, &pos, size);
 	*obj_type = type;
 	*obj_size = size;
 	*next_obj_offset = pos;
 	return data;
 }
 
-static int find_delta(const unsigned char *base_sha1)
+static int find_delta(const union delta_base *base)
 {
 	int first = 0, last = nr_deltas;
 
@@ -189,7 +211,7 @@ static int find_delta(const unsigned char *base_sha1)
                 struct delta_entry *delta = &deltas[next];
                 int cmp;
 
-                cmp = hashcmp(base_sha1, delta->base_sha1);
+                cmp = memcmp(base, &delta->base, sizeof(*base));
                 if (!cmp)
                         return next;
                 if (cmp < 0) {
@@ -201,18 +223,18 @@ static int find_delta(const unsigned char *base_sha1)
         return -first-1;
 }
 
-static int find_deltas_based_on_sha1(const unsigned char *base_sha1,
-				     int *first_index, int *last_index)
+static int find_delta_childs(const union delta_base *base,
+			     int *first_index, int *last_index)
 {
-	int first = find_delta(base_sha1);
+	int first = find_delta(base);
 	int last = first;
 	int end = nr_deltas - 1;
 
 	if (first < 0)
 		return -1;
-	while (first > 0 && !hashcmp(deltas[first - 1].base_sha1, base_sha1))
+	while (first > 0 && !memcmp(&deltas[first - 1].base, base, sizeof(*base)))
 		--first;
-	while (last < end && !hashcmp(deltas[last + 1].base_sha1, base_sha1))
+	while (last < end && !memcmp(&deltas[last + 1].base, base, sizeof(*base)))
 		++last;
 	*first_index = first;
 	*last_index = last;
@@ -253,13 +275,13 @@ static void resolve_delta(struct delta_entry *delta, void *base_data,
 	void *result;
 	unsigned long result_size;
 	enum object_type delta_type;
-	unsigned char base_sha1[20];
+	union delta_base delta_base;
 	unsigned long next_obj_offset;
 	int j, first, last;
 
 	obj->real_type = type;
 	delta_data = unpack_raw_entry(obj->offset, &delta_type,
-				      &delta_size, base_sha1,
+				      &delta_size, &delta_base,
 				      &next_obj_offset);
 	result = patch_delta(base_data, base_size, delta_data, delta_size,
 			     &result_size);
@@ -267,10 +289,22 @@ static void resolve_delta(struct delta_entry *delta, void *base_data,
 	if (!result)
 		bad_object(obj->offset, "failed to apply delta");
 	sha1_object(result, result_size, type, obj->sha1);
-	if (!find_deltas_based_on_sha1(obj->sha1, &first, &last)) {
+
+	hashcpy(delta_base.sha1, obj->sha1);
+	if (!find_delta_childs(&delta_base, &first, &last)) {
 		for (j = first; j <= last; j++)
-			resolve_delta(&deltas[j], result, result_size, type);
+			if (deltas[j].obj->type == OBJ_REF_DELTA)
+				resolve_delta(&deltas[j], result, result_size, type);
 	}
+
+	memset(&delta_base, 0, sizeof(delta_base));
+	delta_base.offset = obj->offset;
+	if (!find_delta_childs(&delta_base, &first, &last)) {
+		for (j = first; j <= last; j++)
+			if (deltas[j].obj->type == OBJ_OFS_DELTA)
+				resolve_delta(&deltas[j], result, result_size, type);
+	}
+
 	free(result);
 }
 
@@ -278,14 +312,14 @@ static int compare_delta_entry(const void *a, const void *b)
 {
 	const struct delta_entry *delta_a = a;
 	const struct delta_entry *delta_b = b;
-	return hashcmp(delta_a->base_sha1, delta_b->base_sha1);
+	return memcmp(&delta_a->base, &delta_b->base, sizeof(union delta_base));
 }
 
 static void parse_pack_objects(void)
 {
 	int i;
 	unsigned long offset = sizeof(struct pack_header);
-	unsigned char base_sha1[20];
+	struct delta_entry *delta = deltas;
 	void *data;
 	unsigned long data_size;
 
@@ -299,12 +333,12 @@ static void parse_pack_objects(void)
 		struct object_entry *obj = &objects[i];
 		obj->offset = offset;
 		data = unpack_raw_entry(offset, &obj->type, &data_size,
-					base_sha1, &offset);
+					&delta->base, &offset);
 		obj->real_type = obj->type;
-		if (obj->type == OBJ_REF_DELTA) {
-			struct delta_entry *delta = &deltas[nr_deltas++];
+		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
+			nr_deltas++;
 			delta->obj = obj;
-			hashcpy(delta->base_sha1, base_sha1);
+			delta++;
 		} else
 			sha1_object(data, data_size, obj->type, obj->sha1);
 		free(data);
@@ -312,7 +346,7 @@ static void parse_pack_objects(void)
 	if (offset != pack_size - 20)
 		die("packfile '%s' has junk at the end", pack_name);
 
-	/* Sort deltas by base SHA1 for fast searching */
+	/* Sort deltas by base SHA1/offset for fast searching */
 	qsort(deltas, nr_deltas, sizeof(struct delta_entry),
 	      compare_delta_entry);
 
@@ -326,22 +360,37 @@ static void parse_pack_objects(void)
 	 */
 	for (i = 0; i < nr_objects; i++) {
 		struct object_entry *obj = &objects[i];
-		int j, first, last;
+		union delta_base base;
+		int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
 
-		if (obj->type == OBJ_REF_DELTA)
+		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
 			continue;
-		if (find_deltas_based_on_sha1(obj->sha1, &first, &last))
+		hashcpy(base.sha1, obj->sha1);
+		ref = !find_delta_childs(&base, &ref_first, &ref_last);
+		memset(&base, 0, sizeof(base));
+		base.offset = obj->offset;
+		ofs = !find_delta_childs(&base, &ofs_first, &ofs_last);
+		if (!ref && !ofs)
 			continue;
 		data = unpack_raw_entry(obj->offset, &obj->type, &data_size,
-					base_sha1, &offset);
-		for (j = first; j <= last; j++)
-			resolve_delta(&deltas[j], data, data_size, obj->type);
+					&base, &offset);
+		if (ref)
+			for (j = ref_first; j <= ref_last; j++)
+				if (deltas[j].obj->type == OBJ_REF_DELTA)
+					resolve_delta(&deltas[j], data,
+						      data_size, obj->type);
+		if (ofs)
+			for (j = ofs_first; j <= ofs_last; j++)
+				if (deltas[j].obj->type == OBJ_OFS_DELTA)
+					resolve_delta(&deltas[j], data,
+						      data_size, obj->type);
 		free(data);
 	}
 
 	/* Check for unresolved deltas */
 	for (i = 0; i < nr_deltas; i++) {
-		if (deltas[i].obj->real_type == OBJ_REF_DELTA)
+		if (deltas[i].obj->real_type == OBJ_REF_DELTA ||
+		    deltas[i].obj->real_type == OBJ_OFS_DELTA)
 			die("packfile '%s' has unresolved deltas",  pack_name);
 	}
 }
-- 
cgit v1.2.1


From 3c552873c698117689af4e5159c7e491fe3a89a3 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 17 Oct 2006 16:23:26 -0400
Subject: index-pack: compare only the first 20-bytes of the key.

The "union delta_base" is a strange beast.  It is a 20-byte
binary blob key to search a binary searchable deltas[] array,
each element of which uses it to represent its base object with
either a full 20-byte SHA-1 or an offset in the pack.  Which
representation is used is determined by another field of the
deltas[] array element, obj->type, so there is no room for
confusion, as long as we make sure we compare the keys for the
same type only with appropriate length.  The code compared the
full union with memcmp().

When storing the in-pack offset, the union was first cleared
before storing an unsigned long, so comparison worked fine.

On 64-bit architectures, however, the union typically is 24-byte
long; the code did not clear the remaining 4-byte alignment
padding when storing a full 20-byte SHA-1 representation.  Using
memcmp() to compare the whole union was wrong.

This fixes the comparison to look at the first 20-bytes of the
union, regardless of the architecture.  As long as ulong is
smaller than 20-bytes this works fine.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 index-pack.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'index-pack.c')

diff --git a/index-pack.c b/index-pack.c
index fffddd25c9..56c590e3fa 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -23,6 +23,12 @@ union delta_base {
 	unsigned long offset;
 };
 
+/*
+ * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
+ * to memcmp() only the first 20 bytes.
+ */
+#define UNION_BASE_SZ	20
+
 struct delta_entry
 {
 	struct object_entry *obj;
@@ -211,7 +217,7 @@ static int find_delta(const union delta_base *base)
                 struct delta_entry *delta = &deltas[next];
                 int cmp;
 
-                cmp = memcmp(base, &delta->base, sizeof(*base));
+                cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
                 if (!cmp)
                         return next;
                 if (cmp < 0) {
@@ -232,9 +238,9 @@ static int find_delta_childs(const union delta_base *base,
 
 	if (first < 0)
 		return -1;
-	while (first > 0 && !memcmp(&deltas[first - 1].base, base, sizeof(*base)))
+	while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
 		--first;
-	while (last < end && !memcmp(&deltas[last + 1].base, base, sizeof(*base)))
+	while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
 		++last;
 	*first_index = first;
 	*last_index = last;
@@ -312,7 +318,7 @@ static int compare_delta_entry(const void *a, const void *b)
 {
 	const struct delta_entry *delta_a = a;
 	const struct delta_entry *delta_b = b;
-	return memcmp(&delta_a->base, &delta_b->base, sizeof(union delta_base));
+	return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
 }
 
 static void parse_pack_objects(void)
-- 
cgit v1.2.1


From 2d477051ef260aad352d63fc7d9c07e4ebb4359b Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 20 Oct 2006 14:45:21 -0400
Subject: add the capability for index-pack to read from a stream

This patch only adds the streaming capability to index-pack.  Although
the code is different it has the exact same functionality as before to
make sure nothing broke.

This is in preparation for receiving packs over the net, parse them on
the fly, fix them up if they are "thin" packs, and keep the resulting
pack instead of exploding it into loose objects.  But such functionality
should come separately.

One immediate advantage of this patch is that index-pack can now deal
with packs up to 4GB in size even on 32-bit architectures since the pack
is not entirely mmap()'d all at once anymore.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 index-pack.c | 244 ++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 143 insertions(+), 101 deletions(-)

(limited to 'index-pack.c')

diff --git a/index-pack.c b/index-pack.c
index 56c590e3fa..e33f60524f 100644
--- a/index-pack.c
+++ b/index-pack.c
@@ -13,6 +13,8 @@ static const char index_pack_usage[] =
 struct object_entry
 {
 	unsigned long offset;
+	unsigned long size;
+	unsigned int hdr_size;
 	enum object_type type;
 	enum object_type real_type;
 	unsigned char sha1[20];
@@ -36,51 +38,68 @@ struct delta_entry
 };
 
 static const char *pack_name;
-static unsigned char *pack_base;
-static unsigned long pack_size;
 static struct object_entry *objects;
 static struct delta_entry *deltas;
 static int nr_objects;
 static int nr_deltas;
 
-static void open_pack_file(void)
+/* We always read in 4kB chunks. */
+static unsigned char input_buffer[4096];
+static unsigned long input_offset, input_len, consumed_bytes;
+static SHA_CTX input_ctx;
+static int input_fd;
+
+/*
+ * Make sure at least "min" bytes are available in the buffer, and
+ * return the pointer to the buffer.
+ */
+static void * fill(int min)
 {
-	int fd;
-	struct stat st;
+	if (min <= input_len)
+		return input_buffer + input_offset;
+	if (min > sizeof(input_buffer))
+		die("cannot fill %d bytes", min);
+	if (input_offset) {
+		SHA1_Update(&input_ctx, input_buffer, input_offset);
+		memcpy(input_buffer, input_buffer + input_offset, input_len);
+		input_offset = 0;
+	}
+	do {
+		int ret = xread(input_fd, input_buffer + input_len,
+				sizeof(input_buffer) - input_len);
+		if (ret <= 0) {
+			if (!ret)
+				die("early EOF");
+			die("read error on input: %s", strerror(errno));
+		}
+		input_len += ret;
+	} while (input_len < min);
+	return input_buffer;
+}
+
+static void use(int bytes)
+{
+	if (bytes > input_len)
+		die("used more bytes than were available");
+	input_len -= bytes;
+	input_offset += bytes;
+	consumed_bytes += bytes;
+}
 
-	fd = open(pack_name, O_RDONLY);
-	if (fd < 0)
+static void open_pack_file(void)
+{
+	input_fd = open(pack_name, O_RDONLY);
+	if (input_fd < 0)
 		die("cannot open packfile '%s': %s", pack_name,
 		    strerror(errno));
-	if (fstat(fd, &st)) {
-		int err = errno;
-		close(fd);
-		die("cannot fstat packfile '%s': %s", pack_name,
-		    strerror(err));
-	}
-	pack_size = st.st_size;
-	pack_base = mmap(NULL, pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
-	if (pack_base == MAP_FAILED) {
-		int err = errno;
-		close(fd);
-		die("cannot mmap packfile '%s': %s", pack_name,
-		    strerror(err));
-	}
-	close(fd);
+	SHA1_Init(&input_ctx);
 }
 
 static void parse_pack_header(void)
 {
-	const struct pack_header *hdr;
-	unsigned char sha1[20];
-	SHA_CTX ctx;
-
-	/* Ensure there are enough bytes for the header and final SHA1 */
-	if (pack_size < sizeof(struct pack_header) + 20)
-		die("packfile '%s' is too small", pack_name);
+	struct pack_header *hdr = fill(sizeof(struct pack_header));
 
 	/* Header consistency check */
-	hdr = (void *)pack_base;
 	if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
 		die("packfile '%s' signature mismatch", pack_name);
 	if (!pack_version_ok(hdr->hdr_version))
@@ -88,13 +107,8 @@ static void parse_pack_header(void)
 		    pack_name, ntohl(hdr->hdr_version));
 
 	nr_objects = ntohl(hdr->hdr_entries);
-
-	/* Check packfile integrity */
-	SHA1_Init(&ctx);
-	SHA1_Update(&ctx, pack_base, pack_size - 20);
-	SHA1_Final(sha1, &ctx);
-	if (hashcmp(sha1, pack_base + pack_size - 20))
-		die("packfile '%s' SHA1 mismatch", pack_name);
+	use(sizeof(struct pack_header));
+	/*fprintf(stderr, "Indexing %d objects\n", nr_objects);*/
 }
 
 static void bad_object(unsigned long offset, const char *format,
@@ -112,85 +126,78 @@ static void bad_object(unsigned long offset, const char *format, ...)
 	    pack_name, offset, buf);
 }
 
-static void *unpack_entry_data(unsigned long offset,
-			       unsigned long *current_pos, unsigned long size)
+static void *unpack_entry_data(unsigned long offset, unsigned long size)
 {
-	unsigned long pack_limit = pack_size - 20;
-	unsigned long pos = *current_pos;
 	z_stream stream;
 	void *buf = xmalloc(size);
 
 	memset(&stream, 0, sizeof(stream));
 	stream.next_out = buf;
 	stream.avail_out = size;
-	stream.next_in = pack_base + pos;
-	stream.avail_in = pack_limit - pos;
+	stream.next_in = fill(1);
+	stream.avail_in = input_len;
 	inflateInit(&stream);
 
 	for (;;) {
 		int ret = inflate(&stream, 0);
-		if (ret == Z_STREAM_END)
+		use(input_len - stream.avail_in);
+		if (stream.total_out == size && ret == Z_STREAM_END)
 			break;
 		if (ret != Z_OK)
 			bad_object(offset, "inflate returned %d", ret);
+		stream.next_in = fill(1);
+		stream.avail_in = input_len;
 	}
 	inflateEnd(&stream);
-	if (stream.total_out != size)
-		bad_object(offset, "size mismatch (expected %lu, got %lu)",
-			   size, stream.total_out);
-	*current_pos = pack_limit - stream.avail_in;
 	return buf;
 }
 
-static void *unpack_raw_entry(unsigned long offset,
-			      enum object_type *obj_type,
-			      unsigned long *obj_size,
-			      union delta_base *delta_base,
-			      unsigned long *next_obj_offset)
+static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
 {
-	unsigned long pack_limit = pack_size - 20;
-	unsigned long pos = offset;
-	unsigned char c;
+	unsigned char *p, c;
 	unsigned long size, base_offset;
 	unsigned shift;
-	enum object_type type;
-	void *data;
 
-	c = pack_base[pos++];
-	type = (c >> 4) & 7;
+	obj->offset = consumed_bytes;
+
+	p = fill(1);
+	c = *p;
+	use(1);
+	obj->type = (c >> 4) & 7;
 	size = (c & 15);
 	shift = 4;
 	while (c & 0x80) {
-		if (pos >= pack_limit)
-			bad_object(offset, "object extends past end of pack");
-		c = pack_base[pos++];
+		p = fill(1);
+		c = *p;
+		use(1);
 		size += (c & 0x7fUL) << shift;
 		shift += 7;
 	}
+	obj->size = size;
 
-	switch (type) {
+	switch (obj->type) {
 	case OBJ_REF_DELTA:
-		if (pos + 20 >= pack_limit)
-			bad_object(offset, "object extends past end of pack");
-		hashcpy(delta_base->sha1, pack_base + pos);
-		pos += 20;
+		hashcpy(delta_base->sha1, fill(20));
+		use(20);
 		break;
 	case OBJ_OFS_DELTA:
 		memset(delta_base, 0, sizeof(*delta_base));
-		c = pack_base[pos++];
+		p = fill(1);
+		c = *p;
+		use(1);
 		base_offset = c & 127;
 		while (c & 128) {
 			base_offset += 1;
 			if (!base_offset || base_offset & ~(~0UL >> 7))
-				bad_object(offset, "offset value overflow for delta base object");
-			if (pos >= pack_limit)
-				bad_object(offset, "object extends past end of pack");
-			c = pack_base[pos++];
+				bad_object(obj->offset, "offset value overflow for delta base object");
+			p = fill(1);
+			c = *p;
+			use(1);
 			base_offset = (base_offset << 7) + (c & 127);
 		}
-		delta_base->offset = offset - base_offset;
-		if (delta_base->offset >= offset)
-			bad_object(offset, "delta base offset is out of bound");
+		delta_base->offset = obj->offset - base_offset;
+		if (delta_base->offset >= obj->offset)
+			bad_object(obj->offset, "delta base offset is out of bound");
 		break;
 	case OBJ_COMMIT:
 	case OBJ_TREE:
@@ -198,13 +205,38 @@ static void *unpack_raw_entry(unsigned long offset,
 	case OBJ_TAG:
 		break;
 	default:
-		bad_object(offset, "bad object type %d", type);
+		bad_object(obj->offset, "bad object type %d", obj->type);
 	}
+	obj->hdr_size = consumed_bytes - obj->offset;
+
+	return unpack_entry_data(obj->offset, obj->size);
+}
+
+static void * get_data_from_pack(struct object_entry *obj)
+{
+	unsigned long from = obj[0].offset + obj[0].hdr_size;
+	unsigned long len = obj[1].offset - from;
+	unsigned pg_offset = from % getpagesize();
+	unsigned char *map, *data;
+	z_stream stream;
+	int st;
 
-	data = unpack_entry_data(offset, &pos, size);
-	*obj_type = type;
-	*obj_size = size;
-	*next_obj_offset = pos;
+	map = mmap(NULL, len + pg_offset, PROT_READ, MAP_PRIVATE,
+		   input_fd, from - pg_offset);
+	if (map == MAP_FAILED)
+		die("cannot mmap packfile '%s': %s", pack_name, strerror(errno));
+	data = xmalloc(obj->size);
+	memset(&stream, 0, sizeof(stream));
+	stream.next_out = data;
+	stream.avail_out = obj->size;
+	stream.next_in = map + pg_offset;
+	stream.avail_in = len;
+	inflateInit(&stream);
+	while ((st = inflate(&stream, Z_FINISH)) == Z_OK);
+	inflateEnd(&stream);
+	if (st != Z_STREAM_END || stream.total_out != obj->size)
+		die("serious inflate inconsistency");
+	munmap(map, len + pg_offset);
 	return data;
 }
 
@@ -280,15 +312,12 @@ static void resolve_delta(struct delta_entry *delta, void *base_data,
 	unsigned long delta_size;
 	void *result;
 	unsigned long result_size;
-	enum object_type delta_type;
 	union delta_base delta_base;
-	unsigned long next_obj_offset;
 	int j, first, last;
 
 	obj->real_type = type;
-	delta_data = unpack_raw_entry(obj->offset, &delta_type,
-				      &delta_size, &delta_base,
-				      &next_obj_offset);
+	delta_data = get_data_from_pack(obj);
+	delta_size = obj->size;
 	result = patch_delta(base_data, base_size, delta_data, delta_size,
 			     &result_size);
 	free(delta_data);
@@ -321,13 +350,13 @@ static int compare_delta_entry(const void *a, const void *b)
 	return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
 }
 
-static void parse_pack_objects(void)
+/* Parse all objects and return the pack content SHA1 hash */
+static void parse_pack_objects(unsigned char *sha1)
 {
 	int i;
-	unsigned long offset = sizeof(struct pack_header);
 	struct delta_entry *delta = deltas;
 	void *data;
-	unsigned long data_size;
+	struct stat st;
 
 	/*
 	 * First pass:
@@ -337,19 +366,29 @@ static void parse_pack_objects(void)
 	 */
 	for (i = 0; i < nr_objects; i++) {
 		struct object_entry *obj = &objects[i];
-		obj->offset = offset;
-		data = unpack_raw_entry(offset, &obj->type, &data_size,
-					&delta->base, &offset);
+		data = unpack_raw_entry(obj, &delta->base);
 		obj->real_type = obj->type;
 		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
 			nr_deltas++;
 			delta->obj = obj;
 			delta++;
 		} else
-			sha1_object(data, data_size, obj->type, obj->sha1);
+			sha1_object(data, obj->size, obj->type, obj->sha1);
 		free(data);
 	}
-	if (offset != pack_size - 20)
+	objects[i].offset = consumed_bytes;
+
+	/* Check pack integrity */
+	SHA1_Update(&input_ctx, input_buffer, input_offset);
+	SHA1_Final(sha1, &input_ctx);
+	if (hashcmp(fill(20), sha1))
+		die("packfile '%s' SHA1 mismatch", pack_name);
+	use(20);
+
+	/* If input_fd is a file, we should have reached its end now. */
+	if (fstat(input_fd, &st))
+		die("cannot fstat packfile '%s': %s", pack_name, strerror(errno));
+	if (S_ISREG(st.st_mode) && st.st_size != consumed_bytes)
 		die("packfile '%s' has junk at the end", pack_name);
 
 	/* Sort deltas by base SHA1/offset for fast searching */
@@ -378,18 +417,17 @@ static void parse_pack_objects(void)
 		ofs = !find_delta_childs(&base, &ofs_first, &ofs_last);
 		if (!ref && !ofs)
 			continue;
-		data = unpack_raw_entry(obj->offset, &obj->type, &data_size,
-					&base, &offset);
+		data = get_data_from_pack(obj);
 		if (ref)
 			for (j = ref_first; j <= ref_last; j++)
 				if (deltas[j].obj->type == OBJ_REF_DELTA)
 					resolve_delta(&deltas[j], data,
-						      data_size, obj->type);
+						      obj->size, obj->type);
 		if (ofs)
 			for (j = ofs_first; j <= ofs_last; j++)
 				if (deltas[j].obj->type == OBJ_OFS_DELTA)
 					resolve_delta(&deltas[j], data,
-						      data_size, obj->type);
+						      obj->size, obj->type);
 		free(data);
 	}
 
@@ -408,6 +446,10 @@ static int sha1_compare(const void *_a, const void *_b)
 	return hashcmp(a->sha1, b->sha1);
 }
 
+/*
+ * On entry *sha1 contains the pack content SHA1 hash, on exit it is
+ * the SHA1 hash of sorted object names.
+ */
 static void write_index_file(const char *index_name, unsigned char *sha1)
 {
 	struct sha1file *f;
@@ -467,7 +509,7 @@ static void write_index_file(const char *index_name, unsigned char *sha1)
 		sha1write(f, obj->sha1, 20);
 		SHA1_Update(&ctx, obj->sha1, 20);
 	}
-	sha1write(f, pack_base + pack_size - 20, 20);
+	sha1write(f, sha1, 20);
 	sha1close(f, NULL, 1);
 	free(sorted_by_sha);
 	SHA1_Final(sha1, &ctx);
@@ -513,9 +555,9 @@ int main(int argc, char **argv)
 
 	open_pack_file();
 	parse_pack_header();
-	objects = xcalloc(nr_objects, sizeof(struct object_entry));
+	objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));
 	deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
-	parse_pack_objects();
+	parse_pack_objects(sha1);
 	free(deltas);
 	write_index_file(index_name, sha1);
 	free(objects);
-- 
cgit v1.2.1