chunk-format: parse trailing table of contents

The new read_trailing_table_of_contents() mimics read_table_of_contents() except that it reads the table of contents in reverse from the end of the given hashfile. The file is given as a memory-mapped section of memory and a size. Automatically calculate the start of the trailing hash and read the table of contents in revers from that position. The errors come along from those in read_table_of_contents(). The one exception is that the chunk_offset cannot be checked as going into the table of contents since we do not have that length automatically. That may have some surprising results for some narrow forms of corruption. However, we do still limit the size to the size of the file plus the part of the table of contents read so far. At minimum, the given sizes can be used to limit parsing within the file itself. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Taylor Blau <me@ttaylorr.com>
author: Derrick Stolee <derrickstolee@github.com> 2022-11-07 18:35:45 +0000
committer: Taylor Blau <me@ttaylorr.com> 2022-11-07 13:53:52 -0500
commit: 953d2001fa47ba5d0f8882fe219089a7c488d57b (patch)
tree: da565e8f40b9a2285d9ff5d2ef26e5d822c34ec8
parent: 7056e7f0c8e55d5c3fae5648d5434d83436e8977 (diff)
download: git-953d2001fa47ba5d0f8882fe219089a7c488d57b.tar.gz
2 files changed, 62 insertions, 0 deletions
diff --git a/chunk-format.c b/chunk-format.c
index 3f5cc9b5dd..e836a121c5 100644
--- a/chunk-format.c
+++ b/chunk-format.c
@@ -173,6 +173,59 @@ int read_table_of_contents(struct chunkfile *cf,
 	return 0;
 }
 
+int read_trailing_table_of_contents(struct chunkfile *cf,
+				    const unsigned char *mfile,
+				    size_t mfile_size)
+{
+	int i;
+	uint32_t chunk_id;
+	const unsigned char *table_of_contents = mfile + mfile_size - the_hash_algo->rawsz;
+
+	while (1) {
+		uint64_t chunk_offset;
+
+		table_of_contents -= CHUNK_TOC_ENTRY_SIZE;
+
+		chunk_id = get_be32(table_of_contents);
+		chunk_offset = get_be64(table_of_contents + 4);
+
+		/* Calculate the previous chunk size, if it exists. */
+		if (cf->chunks_nr) {
+			off_t previous_offset = cf->chunks[cf->chunks_nr - 1].offset;
+
+			if (chunk_offset < previous_offset ||
+			    chunk_offset > table_of_contents - mfile) {
+				error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
+				previous_offset, chunk_offset);
+				return -1;
+			}
+
+			cf->chunks[cf->chunks_nr - 1].size = chunk_offset - previous_offset;
+		}
+
+		/* Stop at the null chunk. We only need it for the last size. */
+		if (!chunk_id)
+			break;
+
+		for (i = 0; i < cf->chunks_nr; i++) {
+			if (cf->chunks[i].id == chunk_id) {
+				error(_("duplicate chunk ID %"PRIx32" found"),
+					chunk_id);
+				return -1;
+			}
+		}
+
+		ALLOC_GROW(cf->chunks, cf->chunks_nr + 1, cf->chunks_alloc);
+
+		cf->chunks[cf->chunks_nr].id = chunk_id;
+		cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
+		cf->chunks[cf->chunks_nr].offset = chunk_offset;
+		cf->chunks_nr++;
+	}
+
+	return 0;
+}
+
 static int pair_chunk_fn(const unsigned char *chunk_start,
 			 size_t chunk_size,
 			 void *data)
diff --git a/chunk-format.h b/chunk-format.h
index 39e8967e95..acb8dfbce8 100644
--- a/chunk-format.h
+++ b/chunk-format.h
@@ -46,6 +46,15 @@ int read_table_of_contents(struct chunkfile *cf,
 			   uint64_t toc_offset,
 			   int toc_length);
 
+/**
+ * Read the given chunkfile, but read the table of contents from the
+ * end of the given mfile. The file is expected to be a hashfile with
+ * the_hash_file->rawsz bytes at the end storing the hash.
+ */
+int read_trailing_table_of_contents(struct chunkfile *cf,
+				    const unsigned char *mfile,
+				    size_t mfile_size);
+
 #define CHUNK_NOT_FOUND (-2)
 
 /*
author	Derrick Stolee <derrickstolee@github.com>	2022-11-07 18:35:45 +0000
committer	Taylor Blau <me@ttaylorr.com>	2022-11-07 13:53:52 -0500
commit	953d2001fa47ba5d0f8882fe219089a7c488d57b (patch)
tree	da565e8f40b9a2285d9ff5d2ef26e5d822c34ec8
parent	7056e7f0c8e55d5c3fae5648d5434d83436e8977 (diff)
download	git-953d2001fa47ba5d0f8882fe219089a7c488d57b.tar.gz