diff options
author | lhchavez <lhchavez@lhchavez.com> | 2021-01-04 18:22:43 -0800 |
---|---|---|
committer | lhchavez <lhchavez@lhchavez.com> | 2021-01-10 11:18:38 -0800 |
commit | 3fd57a75e9dd0c4b0e40ee6e21568d40bd70d29b (patch) | |
tree | 7ba89ed7cea693a3a48c6ab509608ab5175aab76 /src | |
parent | f0d7922c9bafff38e12978625f467aafebe75145 (diff) | |
download | libgit2-3fd57a75e9dd0c4b0e40ee6e21568d40bd70d29b.tar.gz |
commit-graph: Introduce a parser for commit-graph files
This change is the first in a series to add support for git's
commit-graph. This should speed up commit graph traversals by avoiding
object parsing and allowing some operations to terminate earlier.
Part of: #5757
Diffstat (limited to 'src')
-rw-r--r-- | src/commit_graph.c | 299 | ||||
-rw-r--r-- | src/commit_graph.h | 67 |
2 files changed, 366 insertions, 0 deletions
diff --git a/src/commit_graph.c b/src/commit_graph.c new file mode 100644 index 000000000..71a56e3da --- /dev/null +++ b/src/commit_graph.c @@ -0,0 +1,299 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "commit_graph.h" + +#include "futils.h" +#include "hash.h" + +#define GIT_COMMIT_GRAPH_MISSING_PARENT 0x70000000 + +#define COMMIT_GRAPH_SIGNATURE 0x43475048 /* "CGPH" */ +#define COMMIT_GRAPH_VERSION 1 +#define COMMIT_GRAPH_OBJECT_ID_VERSION 1 +struct git_commit_graph_header { + uint32_t signature; + uint8_t version; + uint8_t object_id_version; + uint8_t chunks; + uint8_t base_graph_files; +}; + +#define COMMIT_GRAPH_OID_FANOUT_ID 0x4f494446 /* "OIDF" */ +#define COMMIT_GRAPH_OID_LOOKUP_ID 0x4f49444c /* "OIDL" */ +#define COMMIT_GRAPH_COMMIT_DATA_ID 0x43444154 /* "CDAT" */ +#define COMMIT_GRAPH_EXTRA_EDGE_LIST_ID 0x45444745 /* "EDGE" */ +#define COMMIT_GRAPH_BLOOM_FILTER_INDEX_ID 0x42494458 /* "BIDX" */ +#define COMMIT_GRAPH_BLOOM_FILTER_DATA_ID 0x42444154 /* "BDAT" */ + +struct git_commit_graph_chunk { + off64_t offset; + size_t length; +}; + +static int commit_graph_error(const char *message) +{ + git_error_set(GIT_ERROR_ODB, "invalid commit-graph file - %s", message); + return -1; +} + +static int commit_graph_parse_oid_fanout( + git_commit_graph_file *cgraph, + const unsigned char *data, + struct git_commit_graph_chunk *chunk_oid_fanout) +{ + uint32_t i, nr; + if (chunk_oid_fanout->offset == 0) + return commit_graph_error("missing OID Fanout chunk"); + if (chunk_oid_fanout->length == 0) + return commit_graph_error("empty OID Fanout chunk"); + if (chunk_oid_fanout->length != 256 * 4) + return commit_graph_error("OID Fanout chunk has wrong length"); + + cgraph->oid_fanout = (const uint32_t *)(data + chunk_oid_fanout->offset); + nr = 0; + for (i = 0; i < 256; ++i) { + uint32_t n = ntohl(cgraph->oid_fanout[i]); + if (n < nr) + return commit_graph_error("index is non-monotonic"); + nr = n; + } + cgraph->num_commits = nr; + return 0; +} + +static int commit_graph_parse_oid_lookup( + git_commit_graph_file *cgraph, + const unsigned char *data, + struct git_commit_graph_chunk *chunk_oid_lookup) +{ + uint32_t i; + git_oid *oid, *prev_oid, zero_oid = {{0}}; + + if (chunk_oid_lookup->offset == 0) + return commit_graph_error("missing OID Lookup chunk"); + if (chunk_oid_lookup->length == 0) + return commit_graph_error("empty OID Lookup chunk"); + if (chunk_oid_lookup->length != cgraph->num_commits * GIT_OID_RAWSZ) + return commit_graph_error("OID Lookup chunk has wrong length"); + + cgraph->oid_lookup = oid = (git_oid *)(data + chunk_oid_lookup->offset); + prev_oid = &zero_oid; + for (i = 0; i < cgraph->num_commits; ++i, ++oid) { + if (git_oid_cmp(prev_oid, oid) >= 0) + return commit_graph_error("OID Lookup index is non-monotonic"); + prev_oid = oid; + } + + return 0; +} + +static int commit_graph_parse_commit_data( + git_commit_graph_file *cgraph, + const unsigned char *data, + struct git_commit_graph_chunk *chunk_commit_data) +{ + if (chunk_commit_data->offset == 0) + return commit_graph_error("missing Commit Data chunk"); + if (chunk_commit_data->length == 0) + return commit_graph_error("empty Commit Data chunk"); + if (chunk_commit_data->length != cgraph->num_commits * (GIT_OID_RAWSZ + 16)) + return commit_graph_error("Commit Data chunk has wrong length"); + + cgraph->commit_data = data + chunk_commit_data->offset; + + return 0; +} + +static int commit_graph_parse_extra_edge_list( + git_commit_graph_file *cgraph, + const unsigned char *data, + struct git_commit_graph_chunk *chunk_extra_edge_list) +{ + if (chunk_extra_edge_list->length == 0) + return 0; + if (chunk_extra_edge_list->length % 4 != 0) + return commit_graph_error("malformed Extra Edge List chunk"); + + cgraph->extra_edge_list = data + chunk_extra_edge_list->offset; + cgraph->num_extra_edge_list = chunk_extra_edge_list->length / 4; + + return 0; +} + +int git_commit_graph_parse(git_commit_graph_file *cgraph, const unsigned char *data, size_t size) +{ + struct git_commit_graph_header *hdr; + const unsigned char *chunk_hdr; + struct git_commit_graph_chunk *last_chunk; + uint32_t i; + off64_t last_chunk_offset, chunk_offset, trailer_offset; + git_oid cgraph_checksum = {{0}}; + int error; + struct git_commit_graph_chunk chunk_oid_fanout = {0}, chunk_oid_lookup = {0}, + chunk_commit_data = {0}, chunk_extra_edge_list = {0}, + chunk_unsupported = {0}; + + GIT_ASSERT_ARG(cgraph); + + if (size < sizeof(struct git_commit_graph_header) + GIT_OID_RAWSZ) + return commit_graph_error("commit-graph is too short"); + + hdr = ((struct git_commit_graph_header *)data); + + if (hdr->signature != htonl(COMMIT_GRAPH_SIGNATURE) || hdr->version != COMMIT_GRAPH_VERSION + || hdr->object_id_version != COMMIT_GRAPH_OBJECT_ID_VERSION) { + return commit_graph_error("unsupported commit-graph version"); + } + if (hdr->chunks == 0) + return commit_graph_error("no chunks in commit-graph"); + + /* + * The very first chunk's offset should be after the header, all the chunk + * headers, and a special zero chunk. + */ + last_chunk_offset = sizeof(struct git_commit_graph_header) + (1 + hdr->chunks) * 12; + trailer_offset = size - GIT_OID_RAWSZ; + if (trailer_offset < last_chunk_offset) + return commit_graph_error("wrong commit-graph size"); + git_oid_cpy(&cgraph->checksum, (git_oid *)(data + trailer_offset)); + + if (git_hash_buf(&cgraph_checksum, data, (size_t)trailer_offset) < 0) + return commit_graph_error("could not calculate signature"); + if (!git_oid_equal(&cgraph_checksum, &cgraph->checksum)) + return commit_graph_error("index signature mismatch"); + + chunk_hdr = data + sizeof(struct git_commit_graph_header); + last_chunk = NULL; + for (i = 0; i < hdr->chunks; ++i, chunk_hdr += 12) { + chunk_offset = ((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 4)))) << 32 + | ((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 8)))); + if (chunk_offset < last_chunk_offset) + return commit_graph_error("chunks are non-monotonic"); + if (chunk_offset >= trailer_offset) + return commit_graph_error("chunks extend beyond the trailer"); + if (last_chunk != NULL) + last_chunk->length = (size_t)(chunk_offset - last_chunk_offset); + last_chunk_offset = chunk_offset; + + switch (ntohl(*((uint32_t *)(chunk_hdr + 0)))) { + case COMMIT_GRAPH_OID_FANOUT_ID: + chunk_oid_fanout.offset = last_chunk_offset; + last_chunk = &chunk_oid_fanout; + break; + + case COMMIT_GRAPH_OID_LOOKUP_ID: + chunk_oid_lookup.offset = last_chunk_offset; + last_chunk = &chunk_oid_lookup; + break; + + case COMMIT_GRAPH_COMMIT_DATA_ID: + chunk_commit_data.offset = last_chunk_offset; + last_chunk = &chunk_commit_data; + break; + + case COMMIT_GRAPH_EXTRA_EDGE_LIST_ID: + chunk_extra_edge_list.offset = last_chunk_offset; + last_chunk = &chunk_extra_edge_list; + break; + + case COMMIT_GRAPH_BLOOM_FILTER_INDEX_ID: + case COMMIT_GRAPH_BLOOM_FILTER_DATA_ID: + chunk_unsupported.offset = last_chunk_offset; + last_chunk = &chunk_unsupported; + break; + + default: + return commit_graph_error("unrecognized chunk ID"); + } + } + last_chunk->length = (size_t)(trailer_offset - last_chunk_offset); + + error = commit_graph_parse_oid_fanout(cgraph, data, &chunk_oid_fanout); + if (error < 0) + return error; + error = commit_graph_parse_oid_lookup(cgraph, data, &chunk_oid_lookup); + if (error < 0) + return error; + error = commit_graph_parse_commit_data(cgraph, data, &chunk_commit_data); + if (error < 0) + return error; + error = commit_graph_parse_extra_edge_list(cgraph, data, &chunk_extra_edge_list); + if (error < 0) + return error; + + return 0; +} + +int git_commit_graph_open(git_commit_graph_file **cgraph_out, const char *path) +{ + git_commit_graph_file *cgraph; + git_file fd = -1; + size_t cgraph_size; + struct stat st; + int error; + + /* TODO: properly open the file without access time using O_NOATIME */ + fd = git_futils_open_ro(path); + if (fd < 0) + return fd; + + if (p_fstat(fd, &st) < 0) { + p_close(fd); + git_error_set(GIT_ERROR_ODB, "multi-pack-index file not found - '%s'", path); + return -1; + } + + if (!S_ISREG(st.st_mode) || !git__is_sizet(st.st_size)) { + p_close(fd); + git_error_set(GIT_ERROR_ODB, "invalid pack index '%s'", path); + return -1; + } + cgraph_size = (size_t)st.st_size; + + cgraph = git__calloc(1, sizeof(git_commit_graph_file)); + GIT_ERROR_CHECK_ALLOC(cgraph); + + error = git_buf_sets(&cgraph->filename, path); + if (error < 0) + return error; + + error = git_futils_mmap_ro(&cgraph->graph_map, fd, 0, cgraph_size); + p_close(fd); + if (error < 0) { + git_commit_graph_free(cgraph); + return error; + } + + if ((error = git_commit_graph_parse(cgraph, cgraph->graph_map.data, cgraph_size)) < 0) { + git_commit_graph_free(cgraph); + return error; + } + + *cgraph_out = cgraph; + return 0; +} + +int git_commit_graph_close(git_commit_graph_file *cgraph) +{ + GIT_ASSERT_ARG(cgraph); + + if (cgraph->graph_map.data) + git_futils_mmap_free(&cgraph->graph_map); + + return 0; +} + +void git_commit_graph_free(git_commit_graph_file *cgraph) +{ + if (!cgraph) + return; + + git_buf_dispose(&cgraph->filename); + git_commit_graph_close(cgraph); + git__free(cgraph); +} diff --git a/src/commit_graph.h b/src/commit_graph.h new file mode 100644 index 000000000..01512d76f --- /dev/null +++ b/src/commit_graph.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#ifndef INCLUDE_commit_graph_h__ +#define INCLUDE_commit_graph_h__ + +#include "common.h" + +#include "map.h" + +/** + * A commit-graph file. + * + * This file contains metadata about commits, particularly the generation + * number for each one. This can help speed up graph operations without + * requiring a full graph traversal. + * + * Support for this feature was added in git 2.19. + */ +typedef struct git_commit_graph_file { + git_map graph_map; + + /* The OID Fanout table. */ + const uint32_t *oid_fanout; + /* The total number of commits in the graph. */ + uint32_t num_commits; + + /* The OID Lookup table. */ + git_oid *oid_lookup; + + /* + * The Commit Data table. Each entry contains the OID of the commit followed + * by two 8-byte fields in network byte order: + * - The indices of the first two parents (32 bits each). + * - The generation number (first 30 bits) and commit time in seconds since + * UNIX epoch (34 bits). + */ + const unsigned char *commit_data; + + /* + * The Extra Edge List table. Each 4-byte entry is a network byte order index + * of one of the i-th (i > 0) parents of commits in the `commit_data` table, + * when the commit has more than 2 parents. + */ + const unsigned char *extra_edge_list; + /* The number of entries in the Extra Edge List table. Each entry is 4 bytes wide. */ + size_t num_extra_edge_list; + + /* The trailer of the file. Contains the SHA1-checksum of the whole file. */ + git_oid checksum; + + /* something like ".git/objects/info/commit-graph". */ + git_buf filename; +} git_commit_graph_file; + +int git_commit_graph_open(git_commit_graph_file **cgraph_out, const char *path); +int git_commit_graph_close(git_commit_graph_file *cgraph); +void git_commit_graph_free(git_commit_graph_file *cgraph); + +/* This is exposed for use in the fuzzers. */ +int git_commit_graph_parse(git_commit_graph_file *cgraph, const unsigned char *data, size_t size); + +#endif |