From 248606ebb0906076367fcfce9574f522f818c26f Mon Sep 17 00:00:00 2001 From: lhchavez Date: Tue, 5 Jan 2021 17:20:27 -0800 Subject: commit-graph: Use the commit-graph in revwalks This change makes revwalks a bit faster by using the `commit-graph` file (if present). This is thanks to the `commit-graph` allow much faster parsing of the commit information by requiring near-zero I/O (aside from reading a few dozen bytes off of a `mmap(2)`-ed file) for each commit, instead of having to read the ODB, inflate the commit, and parse it. This is done by modifying `git_commit_list_parse()` and letting it use the ODB-owned commit-graph file. Part of: #5757 --- src/commit_list.c | 28 +++++++++++++++++++++ src/commit_list.h | 1 + src/odb.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/odb.h | 15 +++++++++-- 4 files changed, 116 insertions(+), 2 deletions(-) diff --git a/src/commit_list.c b/src/commit_list.c index 2e103ec22..dfdd5daab 100644 --- a/src/commit_list.c +++ b/src/commit_list.c @@ -124,6 +124,7 @@ static int commit_quick_parse( return -1; } + node->generation = 0; node->time = commit->committer->when.time; node->out_degree = (uint16_t) git_array_size(commit->parent_ids); node->parents = alloc_parents(walk, node, node->out_degree); @@ -143,11 +144,38 @@ static int commit_quick_parse( int git_commit_list_parse(git_revwalk *walk, git_commit_list_node *commit) { git_odb_object *obj; + git_commit_graph_file *cgraph = NULL; int error; if (commit->parsed) return 0; + /* Let's try to use the commit graph first. */ + git_odb__get_commit_graph(&cgraph, walk->odb); + if (cgraph) { + git_commit_graph_entry e; + + error = git_commit_graph_entry_find(&e, cgraph, &commit->oid, GIT_OID_RAWSZ); + if (error == 0 && git__is_uint16(e.parent_count)) { + size_t i; + commit->generation = (uint32_t)e.generation; + commit->time = e.commit_time; + commit->out_degree = (uint16_t)e.parent_count; + commit->parents = alloc_parents(walk, commit, commit->out_degree); + GIT_ERROR_CHECK_ALLOC(commit->parents); + + for (i = 0; i < commit->out_degree; ++i) { + git_commit_graph_entry parent; + error = git_commit_graph_entry_parent(&parent, cgraph, &e, i); + if (error < 0) + return error; + commit->parents[i] = git_revwalk__commit_lookup(walk, &parent.sha1); + } + commit->parsed = 1; + return 0; + } + } + if ((error = git_odb_read(&obj, walk->odb, &commit->oid)) < 0) return error; diff --git a/src/commit_list.h b/src/commit_list.h index 6a65f8a76..a32377030 100644 --- a/src/commit_list.h +++ b/src/commit_list.h @@ -26,6 +26,7 @@ typedef struct git_commit_list_node { git_oid oid; int64_t time; + uint32_t generation; unsigned int seen:1, uninteresting:1, topo_delay:1, diff --git a/src/odb.c b/src/odb.c index b2988c429..02f97915d 100644 --- a/src/odb.c +++ b/src/odb.c @@ -465,6 +465,13 @@ int git_odb_new(git_odb **out) git__free(db); return -1; } + if (git_buf_init(&db->objects_dir, 0) < 0) { + git_vector_free(&db->backends); + git_cache_dispose(&db->own_cache); + git_mutex_free(&db->lock); + git__free(db); + return -1; + } *out = db; GIT_REFCOUNT_INC(db); @@ -612,6 +619,17 @@ int git_odb__add_default_backends( git_mutex_unlock(&db->lock); #endif + if (git_mutex_lock(&db->lock) < 0) { + git_error_set(GIT_ERROR_ODB, "failed to acquire the odb lock"); + return -1; + } + if (git_buf_len(&db->objects_dir) == 0 && git_buf_sets(&db->objects_dir, objects_dir) < 0) { + git_mutex_unlock(&db->lock); + git_odb_free(db); + return -1; + } + git_mutex_unlock(&db->lock); + /* add the loose object backend */ if (git_odb_backend_loose(&loose, objects_dir, -1, db->do_fsync, 0, 0) < 0 || add_backend_internal(db, loose, GIT_LOOSE_PRIORITY, as_alternates, inode) < 0) @@ -742,6 +760,8 @@ static void odb_free(git_odb *db) if (locked) git_mutex_unlock(&db->lock); + git_buf_dispose(&db->objects_dir); + git_commit_graph_free(db->cgraph); git_vector_free(&db->backends); git_cache_dispose(&db->own_cache); git_mutex_free(&db->lock); @@ -786,6 +806,53 @@ static int odb_exists_1( return (int)found; } +int git_odb__get_commit_graph(git_commit_graph_file **out, git_odb *db) +{ + int error = 0; + + if ((error = git_mutex_lock(&db->lock)) < 0) { + git_error_set(GIT_ERROR_ODB, "failed to acquire the db lock"); + return error; + } + if (!db->cgraph_checked) { + git_buf commit_graph_path = GIT_BUF_INIT; + git_commit_graph_file *cgraph = NULL; + + /* We only check once, no matter the result. */ + db->cgraph_checked = 1; + + if (git_buf_len(&db->objects_dir) == 0) { + /* + * This odb was not opened with an objects directory + * associated. Skip opening the commit graph. + */ + goto done; + } + + if ((error = git_buf_joinpath( + &commit_graph_path, + git_buf_cstr(&db->objects_dir), + "info/commit-graph")) + < 0) { + git_buf_dispose(&commit_graph_path); + goto done; + } + /* Best effort */ + error = git_commit_graph_open(&cgraph, git_buf_cstr(&commit_graph_path)); + git_buf_dispose(&commit_graph_path); + + if (error < 0) + goto done; + + db->cgraph = cgraph; + } + +done: + *out = db->cgraph; + git_mutex_unlock(&db->lock); + return 0; +} + static int odb_freshen_1( git_odb *db, const git_oid *id, @@ -1695,6 +1762,13 @@ int git_odb_refresh(struct git_odb *db) } } } + if (db->cgraph && git_commit_graph_needs_refresh(db->cgraph, NULL)) { + /* We just free the commit graph. The next time it is requested, it will be re-loaded. */ + git_commit_graph_free(db->cgraph); + db->cgraph = NULL; + } + /* Force a lazy re-check next time it is needed. */ + db->cgraph_checked = 0; git_mutex_unlock(&db->lock); return 0; diff --git a/src/odb.h b/src/odb.h index d71985668..dcf29c2e2 100644 --- a/src/odb.h +++ b/src/odb.h @@ -13,10 +13,11 @@ #include "git2/oid.h" #include "git2/types.h" -#include "vector.h" #include "cache.h" -#include "posix.h" +#include "commit_graph.h" #include "filter.h" +#include "posix.h" +#include "vector.h" #define GIT_OBJECTS_DIR "objects/" #define GIT_OBJECT_DIR_MODE 0777 @@ -43,7 +44,10 @@ struct git_odb { git_mutex lock; /* protects backends */ git_vector backends; git_cache own_cache; + git_buf objects_dir; + git_commit_graph_file *cgraph; unsigned int do_fsync :1; + unsigned int cgraph_checked :1; }; typedef enum { @@ -127,6 +131,13 @@ int git_odb__read_header_or_object( git_odb_object **out, size_t *len_p, git_object_t *type_p, git_odb *db, const git_oid *id); +/* + * Attempt to get the ODB's commit graph. This object is still owned by the + * ODB. If the repository does not contain a commit graph, it will return zero + * and `*out` will be set to NULL. + */ +int git_odb__get_commit_graph(git_commit_graph_file **out, git_odb *odb); + /* freshen an entry in the object database */ int git_odb__freshen(git_odb *db, const git_oid *id); -- cgit v1.2.1 From 25b75cd9bc01896a2b74c748ceef7110ea1b165f Mon Sep 17 00:00:00 2001 From: lhchavez Date: Wed, 10 Mar 2021 07:06:15 -0800 Subject: commit-graph: Create `git_commit_graph` as an abstraction for the file This change does a medium-size refactor of the git_commit_graph_file and the interaction with the ODB. Now instead of the ODB owning a direct reference to the git_commit_graph_file, there will be an intermediate git_commit_graph. The main advantage of that is that now end users can explicitly set a git_commit_graph that is eagerly checked for errors, while still being able to lazily use the commit-graph in a regular ODB, if the file is present. --- fuzzers/commit_graph_fuzzer.c | 10 +- include/git2/odb.h | 15 +++ include/git2/sys/commit_graph.h | 45 ++++++++ include/git2/types.h | 3 + src/commit_graph.c | 231 +++++++++++++++++++++++++++------------- src/commit_graph.h | 57 +++++++--- src/commit_list.c | 10 +- src/odb.c | 99 +++++++---------- src/odb.h | 13 ++- tests/graph/commit_graph.c | 30 +++--- 10 files changed, 332 insertions(+), 181 deletions(-) create mode 100644 include/git2/sys/commit_graph.h diff --git a/fuzzers/commit_graph_fuzzer.c b/fuzzers/commit_graph_fuzzer.c index eb2c38258..b41816ed9 100644 --- a/fuzzers/commit_graph_fuzzer.c +++ b/fuzzers/commit_graph_fuzzer.c @@ -31,7 +31,7 @@ int LLVMFuzzerInitialize(int *argc, char ***argv) int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - git_commit_graph_file cgraph = {{0}}; + git_commit_graph_file file = {{0}}; git_commit_graph_entry e; git_buf commit_graph_buf = GIT_BUF_INIT; git_oid oid = {{0}}; @@ -62,19 +62,19 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) git_buf_attach_notowned(&commit_graph_buf, (char *)data, size); } - if (git_commit_graph_parse( - &cgraph, + if (git_commit_graph_file_parse( + &file, (const unsigned char *)git_buf_cstr(&commit_graph_buf), git_buf_len(&commit_graph_buf)) < 0) goto cleanup; /* Search for any oid, just to exercise that codepath. */ - if (git_commit_graph_entry_find(&e, &cgraph, &oid, GIT_OID_HEXSZ) < 0) + if (git_commit_graph_entry_find(&e, &file, &oid, GIT_OID_HEXSZ) < 0) goto cleanup; cleanup: - git_commit_graph_close(&cgraph); + git_commit_graph_file_close(&file); git_buf_dispose(&commit_graph_buf); return 0; } diff --git a/include/git2/odb.h b/include/git2/odb.h index c4bfa5290..dd481e950 100644 --- a/include/git2/odb.h +++ b/include/git2/odb.h @@ -544,6 +544,21 @@ GIT_EXTERN(size_t) git_odb_num_backends(git_odb *odb); */ GIT_EXTERN(int) git_odb_get_backend(git_odb_backend **out, git_odb *odb, size_t pos); +/** + * Set the git commit-graph for the ODB. + * + * After a successfull call, the ownership of the cgraph parameter will be + * transferred to libgit2, and the caller should not free it. + * + * The commit-graph can also be unset by explicitly passing NULL as the cgraph + * parameter. + * + * @param odb object database + * @param cgraph the git commit-graph + * @return 0 on success; error code otherwise + */ +GIT_EXTERN(int) git_odb_set_commit_graph(git_odb *odb, git_commit_graph *cgraph); + /** @} */ GIT_END_DECL #endif diff --git a/include/git2/sys/commit_graph.h b/include/git2/sys/commit_graph.h new file mode 100644 index 000000000..038c9b739 --- /dev/null +++ b/include/git2/sys/commit_graph.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_sys_git_commit_graph_h__ +#define INCLUDE_sys_git_commit_graph_h__ + +#include "git2/common.h" +#include "git2/types.h" + +/** + * @file git2/sys/commit_graph.h + * @brief Git commit-graph + * @defgroup git_commit_graph Git commit-graph APIs + * @ingroup Git + * @{ + */ +GIT_BEGIN_DECL + +/** + * Opens a `git_commit_graph` from a path to an objects directory. + * + * This finds, opens, and validates the `commit-graph` file. + * + * @param cgraph_out the `git_commit_graph` struct to initialize. + * @param objects_dir the path to a git objects directory. + * @return Zero on success; -1 on failure. + */ +GIT_EXTERN(int) git_commit_graph_open(git_commit_graph **cgraph_out, const char *objects_dir); + +/** + * Frees commit-graph data. This should only be called when memory allocated + * using `git_commit_graph_open` is not returned to libgit2 because it was not + * associated with the ODB through a successful call to + * `git_odb_set_commit_graph`. + * + * @param cgraph the commit-graph object to free. If NULL, no action is taken. + */ +GIT_EXTERN(void) git_commit_graph_free(git_commit_graph *cgraph); + +GIT_END_DECL + +#endif diff --git a/include/git2/types.h b/include/git2/types.h index ade0c7d32..562eb8e5f 100644 --- a/include/git2/types.h +++ b/include/git2/types.h @@ -102,6 +102,9 @@ typedef struct git_refdb git_refdb; /** A custom backend for refs */ typedef struct git_refdb_backend git_refdb_backend; +/** A git commit-graph */ +typedef struct git_commit_graph git_commit_graph; + /** * Representation of an existing git repository, * including all its object contents diff --git a/src/commit_graph.c b/src/commit_graph.c index b301d3d49..6715d05dc 100644 --- a/src/commit_graph.c +++ b/src/commit_graph.c @@ -43,7 +43,7 @@ static int commit_graph_error(const char *message) } static int commit_graph_parse_oid_fanout( - git_commit_graph_file *cgraph, + git_commit_graph_file *file, const unsigned char *data, struct git_commit_graph_chunk *chunk_oid_fanout) { @@ -55,20 +55,20 @@ static int commit_graph_parse_oid_fanout( if (chunk_oid_fanout->length != 256 * 4) return commit_graph_error("OID Fanout chunk has wrong length"); - cgraph->oid_fanout = (const uint32_t *)(data + chunk_oid_fanout->offset); + file->oid_fanout = (const uint32_t *)(data + chunk_oid_fanout->offset); nr = 0; for (i = 0; i < 256; ++i) { - uint32_t n = ntohl(cgraph->oid_fanout[i]); + uint32_t n = ntohl(file->oid_fanout[i]); if (n < nr) return commit_graph_error("index is non-monotonic"); nr = n; } - cgraph->num_commits = nr; + file->num_commits = nr; return 0; } static int commit_graph_parse_oid_lookup( - git_commit_graph_file *cgraph, + git_commit_graph_file *file, const unsigned char *data, struct git_commit_graph_chunk *chunk_oid_lookup) { @@ -79,12 +79,12 @@ static int commit_graph_parse_oid_lookup( return commit_graph_error("missing OID Lookup chunk"); if (chunk_oid_lookup->length == 0) return commit_graph_error("empty OID Lookup chunk"); - if (chunk_oid_lookup->length != cgraph->num_commits * GIT_OID_RAWSZ) + if (chunk_oid_lookup->length != file->num_commits * GIT_OID_RAWSZ) return commit_graph_error("OID Lookup chunk has wrong length"); - cgraph->oid_lookup = oid = (git_oid *)(data + chunk_oid_lookup->offset); + file->oid_lookup = oid = (git_oid *)(data + chunk_oid_lookup->offset); prev_oid = &zero_oid; - for (i = 0; i < cgraph->num_commits; ++i, ++oid) { + for (i = 0; i < file->num_commits; ++i, ++oid) { if (git_oid_cmp(prev_oid, oid) >= 0) return commit_graph_error("OID Lookup index is non-monotonic"); prev_oid = oid; @@ -94,7 +94,7 @@ static int commit_graph_parse_oid_lookup( } static int commit_graph_parse_commit_data( - git_commit_graph_file *cgraph, + git_commit_graph_file *file, const unsigned char *data, struct git_commit_graph_chunk *chunk_commit_data) { @@ -102,16 +102,16 @@ static int commit_graph_parse_commit_data( return commit_graph_error("missing Commit Data chunk"); if (chunk_commit_data->length == 0) return commit_graph_error("empty Commit Data chunk"); - if (chunk_commit_data->length != cgraph->num_commits * (GIT_OID_RAWSZ + 16)) + if (chunk_commit_data->length != file->num_commits * (GIT_OID_RAWSZ + 16)) return commit_graph_error("Commit Data chunk has wrong length"); - cgraph->commit_data = data + chunk_commit_data->offset; + file->commit_data = data + chunk_commit_data->offset; return 0; } static int commit_graph_parse_extra_edge_list( - git_commit_graph_file *cgraph, + git_commit_graph_file *file, const unsigned char *data, struct git_commit_graph_chunk *chunk_extra_edge_list) { @@ -120,13 +120,16 @@ static int commit_graph_parse_extra_edge_list( if (chunk_extra_edge_list->length % 4 != 0) return commit_graph_error("malformed Extra Edge List chunk"); - cgraph->extra_edge_list = data + chunk_extra_edge_list->offset; - cgraph->num_extra_edge_list = chunk_extra_edge_list->length / 4; + file->extra_edge_list = data + chunk_extra_edge_list->offset; + file->num_extra_edge_list = chunk_extra_edge_list->length / 4; return 0; } -int git_commit_graph_parse(git_commit_graph_file *cgraph, const unsigned char *data, size_t size) +int git_commit_graph_file_parse( + git_commit_graph_file *file, + const unsigned char *data, + size_t size) { struct git_commit_graph_header *hdr; const unsigned char *chunk_hdr; @@ -139,7 +142,7 @@ int git_commit_graph_parse(git_commit_graph_file *cgraph, const unsigned char *d chunk_commit_data = {0}, chunk_extra_edge_list = {0}, chunk_unsupported = {0}; - GIT_ASSERT_ARG(cgraph); + GIT_ASSERT_ARG(file); if (size < sizeof(struct git_commit_graph_header) + GIT_OID_RAWSZ) return commit_graph_error("commit-graph is too short"); @@ -161,11 +164,11 @@ int git_commit_graph_parse(git_commit_graph_file *cgraph, const unsigned char *d trailer_offset = size - GIT_OID_RAWSZ; if (trailer_offset < last_chunk_offset) return commit_graph_error("wrong commit-graph size"); - git_oid_cpy(&cgraph->checksum, (git_oid *)(data + trailer_offset)); + git_oid_cpy(&file->checksum, (git_oid *)(data + trailer_offset)); if (git_hash_buf(&cgraph_checksum, data, (size_t)trailer_offset) < 0) return commit_graph_error("could not calculate signature"); - if (!git_oid_equal(&cgraph_checksum, &cgraph->checksum)) + if (!git_oid_equal(&cgraph_checksum, &file->checksum)) return commit_graph_error("index signature mismatch"); chunk_hdr = data + sizeof(struct git_commit_graph_header); @@ -214,25 +217,60 @@ int git_commit_graph_parse(git_commit_graph_file *cgraph, const unsigned char *d } last_chunk->length = (size_t)(trailer_offset - last_chunk_offset); - error = commit_graph_parse_oid_fanout(cgraph, data, &chunk_oid_fanout); + error = commit_graph_parse_oid_fanout(file, data, &chunk_oid_fanout); if (error < 0) return error; - error = commit_graph_parse_oid_lookup(cgraph, data, &chunk_oid_lookup); + error = commit_graph_parse_oid_lookup(file, data, &chunk_oid_lookup); if (error < 0) return error; - error = commit_graph_parse_commit_data(cgraph, data, &chunk_commit_data); + error = commit_graph_parse_commit_data(file, data, &chunk_commit_data); if (error < 0) return error; - error = commit_graph_parse_extra_edge_list(cgraph, data, &chunk_extra_edge_list); + error = commit_graph_parse_extra_edge_list(file, data, &chunk_extra_edge_list); if (error < 0) return error; return 0; } -int git_commit_graph_open(git_commit_graph_file **cgraph_out, const char *path) +int git_commit_graph_new(git_commit_graph **cgraph_out, const char *objects_dir, bool open_file) { - git_commit_graph_file *cgraph; + git_commit_graph *cgraph = NULL; + int error = 0; + + GIT_ASSERT_ARG(cgraph_out); + GIT_ASSERT_ARG(objects_dir); + + cgraph = git__calloc(1, sizeof(git_commit_graph)); + GIT_ERROR_CHECK_ALLOC(cgraph); + + error = git_buf_joinpath(&cgraph->filename, objects_dir, "info/commit-graph"); + if (error < 0) + goto error; + + if (open_file) { + error = git_commit_graph_file_open(&cgraph->file, git_buf_cstr(&cgraph->filename)); + if (error < 0) + goto error; + cgraph->checked = 1; + } + + *cgraph_out = cgraph; + return 0; + +error: + git_commit_graph_free(cgraph); + return error; +} + +int git_commit_graph_open(git_commit_graph **cgraph_out, const char *objects_dir) +{ + return git_commit_graph_new(cgraph_out, objects_dir, true); +} + +int git_commit_graph_file_open(git_commit_graph_file **file_out, const char *path) +{ + git_commit_graph_file *file; git_file fd = -1; size_t cgraph_size; struct stat st; @@ -245,56 +283,92 @@ int git_commit_graph_open(git_commit_graph_file **cgraph_out, const char *path) if (p_fstat(fd, &st) < 0) { p_close(fd); - git_error_set(GIT_ERROR_ODB, "multi-pack-index file not found - '%s'", path); - return -1; + git_error_set(GIT_ERROR_ODB, "commit-graph file not found - '%s'", path); + return GIT_ENOTFOUND; } if (!S_ISREG(st.st_mode) || !git__is_sizet(st.st_size)) { p_close(fd); git_error_set(GIT_ERROR_ODB, "invalid pack index '%s'", path); - return -1; + return GIT_ENOTFOUND; } cgraph_size = (size_t)st.st_size; - cgraph = git__calloc(1, sizeof(git_commit_graph_file)); - GIT_ERROR_CHECK_ALLOC(cgraph); - - error = git_buf_sets(&cgraph->filename, path); - if (error < 0) - return error; + file = git__calloc(1, sizeof(git_commit_graph_file)); + GIT_ERROR_CHECK_ALLOC(file); - error = git_futils_mmap_ro(&cgraph->graph_map, fd, 0, cgraph_size); + error = git_futils_mmap_ro(&file->graph_map, fd, 0, cgraph_size); p_close(fd); if (error < 0) { - git_commit_graph_free(cgraph); + git_commit_graph_file_free(file); return error; } - if ((error = git_commit_graph_parse(cgraph, cgraph->graph_map.data, cgraph_size)) < 0) { - git_commit_graph_free(cgraph); + if ((error = git_commit_graph_file_parse(file, file->graph_map.data, cgraph_size)) < 0) { + git_commit_graph_file_free(file); return error; } - *cgraph_out = cgraph; + *file_out = file; return 0; } +int git_commit_graph_get_file(git_commit_graph_file **file_out, git_commit_graph *cgraph) +{ + if (!cgraph->checked) { + int error = 0; + git_commit_graph_file *result = NULL; + + /* We only check once, no matter the result. */ + cgraph->checked = 1; + + /* Best effort */ + error = git_commit_graph_file_open(&result, git_buf_cstr(&cgraph->filename)); + + if (error < 0) + return error; + + cgraph->file = result; + } + if (!cgraph->file) + return GIT_ENOTFOUND; + + *file_out = cgraph->file; + return 0; +} + +void git_commit_graph_refresh(git_commit_graph *cgraph) +{ + if (!cgraph->checked) + return; + + if (cgraph->file + && git_commit_graph_file_needs_refresh(cgraph->file, git_buf_cstr(&cgraph->filename))) { + /* We just free the commit graph. The next time it is requested, it will be + * re-loaded. */ + git_commit_graph_file_free(cgraph->file); + cgraph->file = NULL; + } + /* Force a lazy re-check next time it is needed. */ + cgraph->checked = 0; +} + static int git_commit_graph_entry_get_byindex( git_commit_graph_entry *e, - const git_commit_graph_file *cgraph, + const git_commit_graph_file *file, size_t pos) { const unsigned char *commit_data; GIT_ASSERT_ARG(e); - GIT_ASSERT_ARG(cgraph); + GIT_ASSERT_ARG(file); - if (pos >= cgraph->num_commits) { + if (pos >= file->num_commits) { git_error_set(GIT_ERROR_INVALID, "commit index %zu does not exist", pos); return GIT_ENOTFOUND; } - commit_data = cgraph->commit_data + pos * (GIT_OID_RAWSZ + 4 * sizeof(uint32_t)); + commit_data = file->commit_data + pos * (GIT_OID_RAWSZ + 4 * sizeof(uint32_t)); git_oid_cpy(&e->tree_oid, (const git_oid *)commit_data); e->parent_indices[0] = ntohl(*((uint32_t *)(commit_data + GIT_OID_RAWSZ))); e->parent_indices[1] @@ -310,7 +384,7 @@ static int git_commit_graph_entry_get_byindex( uint32_t extra_edge_list_pos = e->parent_indices[1] & 0x7fffffff; /* Make sure we're not being sent out of bounds */ - if (extra_edge_list_pos >= cgraph->num_extra_edge_list) { + if (extra_edge_list_pos >= file->num_extra_edge_list) { git_error_set(GIT_ERROR_INVALID, "commit %u does not exist", extra_edge_list_pos); @@ -318,9 +392,9 @@ static int git_commit_graph_entry_get_byindex( } e->extra_parents_index = extra_edge_list_pos; - while (extra_edge_list_pos < cgraph->num_extra_edge_list + while (extra_edge_list_pos < file->num_extra_edge_list && (ntohl(*( - (uint32_t *)(cgraph->extra_edge_list + (uint32_t *)(file->extra_edge_list + extra_edge_list_pos * sizeof(uint32_t)))) & 0x80000000u) == 0) { @@ -329,20 +403,17 @@ static int git_commit_graph_entry_get_byindex( } } - git_oid_cpy(&e->sha1, &cgraph->oid_lookup[pos]); + git_oid_cpy(&e->sha1, &file->oid_lookup[pos]); return 0; } -bool git_commit_graph_needs_refresh(const git_commit_graph_file *cgraph, const char *path) +bool git_commit_graph_file_needs_refresh(const git_commit_graph_file *file, const char *path) { git_file fd = -1; struct stat st; ssize_t bytes_read; git_oid cgraph_checksum = {{0}}; - if (path == NULL) - path = git_buf_cstr(&cgraph->filename); - /* TODO: properly open the file without access time using O_NOATIME */ fd = git_futils_open_ro(path); if (fd < 0) @@ -354,7 +425,7 @@ bool git_commit_graph_needs_refresh(const git_commit_graph_file *cgraph, const c } if (!S_ISREG(st.st_mode) || !git__is_sizet(st.st_size) - || (size_t)st.st_size != cgraph->graph_map.len) { + || (size_t)st.st_size != file->graph_map.len) { p_close(fd); return true; } @@ -364,12 +435,12 @@ bool git_commit_graph_needs_refresh(const git_commit_graph_file *cgraph, const c if (bytes_read != GIT_OID_RAWSZ) return true; - return !git_oid_equal(&cgraph_checksum, &cgraph->checksum); + return !git_oid_equal(&cgraph_checksum, &file->checksum); } int git_commit_graph_entry_find( git_commit_graph_entry *e, - const git_commit_graph_file *cgraph, + const git_commit_graph_file *file, const git_oid *short_oid, size_t len) { @@ -378,31 +449,31 @@ int git_commit_graph_entry_find( const git_oid *current = NULL; GIT_ASSERT_ARG(e); - GIT_ASSERT_ARG(cgraph); + GIT_ASSERT_ARG(file); GIT_ASSERT_ARG(short_oid); - hi = ntohl(cgraph->oid_fanout[(int)short_oid->id[0]]); - lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(cgraph->oid_fanout[(int)short_oid->id[0] - 1])); + hi = ntohl(file->oid_fanout[(int)short_oid->id[0]]); + lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(file->oid_fanout[(int)short_oid->id[0] - 1])); - pos = git_pack__lookup_sha1(cgraph->oid_lookup, GIT_OID_RAWSZ, lo, hi, short_oid->id); + pos = git_pack__lookup_sha1(file->oid_lookup, GIT_OID_RAWSZ, lo, hi, short_oid->id); if (pos >= 0) { /* An object matching exactly the oid was found */ found = 1; - current = cgraph->oid_lookup + pos; + current = file->oid_lookup + pos; } else { /* No object was found */ /* pos refers to the object with the "closest" oid to short_oid */ pos = -1 - pos; - if (pos < (int)cgraph->num_commits) { - current = cgraph->oid_lookup + pos; + if (pos < (int)file->num_commits) { + current = file->oid_lookup + pos; if (!git_oid_ncmp(short_oid, current, len)) found = 1; } } - if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)cgraph->num_commits) { + if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)file->num_commits) { /* Check for ambiguousity */ const git_oid *next = current + 1; @@ -413,22 +484,22 @@ int git_commit_graph_entry_find( if (!found) return git_odb__error_notfound( - "failed to find offset for multi-pack index entry", short_oid, len); + "failed to find offset for commit-graph index entry", short_oid, len); if (found > 1) return git_odb__error_ambiguous( - "found multiple offsets for multi-pack index entry"); + "found multiple offsets for commit-graph index entry"); - return git_commit_graph_entry_get_byindex(e, cgraph, pos); + return git_commit_graph_entry_get_byindex(e, file, pos); } int git_commit_graph_entry_parent( git_commit_graph_entry *parent, - const git_commit_graph_file *cgraph, + const git_commit_graph_file *file, const git_commit_graph_entry *entry, size_t n) { GIT_ASSERT_ARG(parent); - GIT_ASSERT_ARG(cgraph); + GIT_ASSERT_ARG(file); if (n >= entry->parent_count) { git_error_set(GIT_ERROR_INVALID, "parent index %zu does not exist", n); @@ -436,35 +507,43 @@ int git_commit_graph_entry_parent( } if (n == 0 || (n == 1 && entry->parent_count == 2)) - return git_commit_graph_entry_get_byindex(parent, cgraph, entry->parent_indices[n]); + return git_commit_graph_entry_get_byindex(parent, file, entry->parent_indices[n]); return git_commit_graph_entry_get_byindex( parent, - cgraph, + file, ntohl( - *(uint32_t *)(cgraph->extra_edge_list + *(uint32_t *)(file->extra_edge_list + (entry->extra_parents_index + n - 1) * sizeof(uint32_t))) & 0x7fffffff); } - -int git_commit_graph_close(git_commit_graph_file *cgraph) +int git_commit_graph_file_close(git_commit_graph_file *file) { - GIT_ASSERT_ARG(cgraph); + GIT_ASSERT_ARG(file); - if (cgraph->graph_map.data) - git_futils_mmap_free(&cgraph->graph_map); + if (file->graph_map.data) + git_futils_mmap_free(&file->graph_map); return 0; } -void git_commit_graph_free(git_commit_graph_file *cgraph) +void git_commit_graph_free(git_commit_graph *cgraph) { if (!cgraph) return; git_buf_dispose(&cgraph->filename); - git_commit_graph_close(cgraph); + git_commit_graph_file_free(cgraph->file); git__free(cgraph); } + +void git_commit_graph_file_free(git_commit_graph_file *file) +{ + if (!file) + return; + + git_commit_graph_file_close(file); + git__free(file); +} diff --git a/src/commit_graph.h b/src/commit_graph.h index f21a03769..e5e3ea1fd 100644 --- a/src/commit_graph.h +++ b/src/commit_graph.h @@ -10,6 +10,9 @@ #include "common.h" +#include "git2/types.h" +#include "git2/sys/commit_graph.h" + #include "map.h" /** @@ -52,9 +55,6 @@ typedef struct git_commit_graph_file { /* The trailer of the file. Contains the SHA1-checksum of the whole file. */ git_oid checksum; - - /* something like ".git/objects/info/commit-graph". */ - git_buf filename; } git_commit_graph_file; /** @@ -88,29 +88,60 @@ typedef struct git_commit_graph_entry { git_oid sha1; } git_commit_graph_entry; -int git_commit_graph_open(git_commit_graph_file **cgraph_out, const char *path); +/* A wrapper for git_commit_graph_file to enable lazy loading in the ODB. */ +struct git_commit_graph { + /* The path to the commit-graph file. Something like ".git/objects/info/commit-graph". */ + git_buf filename; + + /* The underlying commit-graph file. */ + git_commit_graph_file *file; + + /* Whether the commit-graph file was already checked for validity. */ + bool checked; +}; + +/** Create a new commit-graph, optionally opening the underlying file. */ +int git_commit_graph_new(git_commit_graph **cgraph_out, const char *objects_dir, bool open_file); + +/** Open and validate a commit-graph file. */ +int git_commit_graph_file_open(git_commit_graph_file **file_out, const char *path); + +/* + * Attempt to get the git_commit_graph's commit-graph file. This object is + * still owned by the git_commit_graph. If the repository does not contain a commit graph, + * it will return GIT_ENOTFOUND. + * + * This function is not thread-safe. + */ +int git_commit_graph_get_file(git_commit_graph_file **file_out, git_commit_graph *cgraph); + +/* Marks the commit-graph file as needing a refresh. */ +void git_commit_graph_refresh(git_commit_graph *cgraph); /* - * Returns whether the commit_graph_file needs to be reloaded since the - * contents of the commit-graph file have changed on disk. If `path` is NULL, - * the filename stored in `cgraph` will be used. + * Returns whether the git_commit_graph_file needs to be reloaded since the + * contents of the commit-graph file have changed on disk. */ -bool git_commit_graph_needs_refresh(const git_commit_graph_file *cgraph, const char *path); +bool git_commit_graph_file_needs_refresh( + const git_commit_graph_file *file, const char *path); int git_commit_graph_entry_find( git_commit_graph_entry *e, - const git_commit_graph_file *cgraph, + const git_commit_graph_file *file, const git_oid *short_oid, size_t len); int git_commit_graph_entry_parent( git_commit_graph_entry *parent, - const git_commit_graph_file *cgraph, + const git_commit_graph_file *file, const git_commit_graph_entry *entry, size_t n); -int git_commit_graph_close(git_commit_graph_file *cgraph); -void git_commit_graph_free(git_commit_graph_file *cgraph); +int git_commit_graph_file_close(git_commit_graph_file *cgraph); +void git_commit_graph_file_free(git_commit_graph_file *cgraph); /* This is exposed for use in the fuzzers. */ -int git_commit_graph_parse(git_commit_graph_file *cgraph, const unsigned char *data, size_t size); +int git_commit_graph_file_parse( + git_commit_graph_file *file, + const unsigned char *data, + size_t size); #endif diff --git a/src/commit_list.c b/src/commit_list.c index dfdd5daab..11cc2e7d2 100644 --- a/src/commit_list.c +++ b/src/commit_list.c @@ -144,18 +144,18 @@ static int commit_quick_parse( int git_commit_list_parse(git_revwalk *walk, git_commit_list_node *commit) { git_odb_object *obj; - git_commit_graph_file *cgraph = NULL; + git_commit_graph_file *cgraph_file = NULL; int error; if (commit->parsed) return 0; /* Let's try to use the commit graph first. */ - git_odb__get_commit_graph(&cgraph, walk->odb); - if (cgraph) { + git_odb__get_commit_graph_file(&cgraph_file, walk->odb); + if (cgraph_file) { git_commit_graph_entry e; - error = git_commit_graph_entry_find(&e, cgraph, &commit->oid, GIT_OID_RAWSZ); + error = git_commit_graph_entry_find(&e, cgraph_file, &commit->oid, GIT_OID_RAWSZ); if (error == 0 && git__is_uint16(e.parent_count)) { size_t i; commit->generation = (uint32_t)e.generation; @@ -166,7 +166,7 @@ int git_commit_list_parse(git_revwalk *walk, git_commit_list_node *commit) for (i = 0; i < commit->out_degree; ++i) { git_commit_graph_entry parent; - error = git_commit_graph_entry_parent(&parent, cgraph, &e, i); + error = git_commit_graph_entry_parent(&parent, cgraph_file, &e, i); if (error < 0) return error; commit->parents[i] = git_revwalk__commit_lookup(walk, &parent.sha1); diff --git a/src/odb.c b/src/odb.c index 02f97915d..40504ca15 100644 --- a/src/odb.c +++ b/src/odb.c @@ -465,13 +465,6 @@ int git_odb_new(git_odb **out) git__free(db); return -1; } - if (git_buf_init(&db->objects_dir, 0) < 0) { - git_vector_free(&db->backends); - git_cache_dispose(&db->own_cache); - git_mutex_free(&db->lock); - git__free(db); - return -1; - } *out = db; GIT_REFCOUNT_INC(db); @@ -619,17 +612,6 @@ int git_odb__add_default_backends( git_mutex_unlock(&db->lock); #endif - if (git_mutex_lock(&db->lock) < 0) { - git_error_set(GIT_ERROR_ODB, "failed to acquire the odb lock"); - return -1; - } - if (git_buf_len(&db->objects_dir) == 0 && git_buf_sets(&db->objects_dir, objects_dir) < 0) { - git_mutex_unlock(&db->lock); - git_odb_free(db); - return -1; - } - git_mutex_unlock(&db->lock); - /* add the loose object backend */ if (git_odb_backend_loose(&loose, objects_dir, -1, db->do_fsync, 0, 0) < 0 || add_backend_internal(db, loose, GIT_LOOSE_PRIORITY, as_alternates, inode) < 0) @@ -640,6 +622,16 @@ int git_odb__add_default_backends( add_backend_internal(db, packed, GIT_PACKED_PRIORITY, as_alternates, inode) < 0) return -1; + if (git_mutex_lock(&db->lock) < 0) { + git_error_set(GIT_ERROR_ODB, "failed to acquire the odb lock"); + return -1; + } + if (!db->cgraph && git_commit_graph_new(&db->cgraph, objects_dir, false) < 0) { + git_mutex_unlock(&db->lock); + return -1; + } + git_mutex_unlock(&db->lock); + return load_alternates(db, objects_dir, alternate_depth); } @@ -701,6 +693,23 @@ int git_odb_add_disk_alternate(git_odb *odb, const char *path) return git_odb__add_default_backends(odb, path, true, 0); } +int git_odb_set_commit_graph(git_odb *odb, git_commit_graph *cgraph) +{ + int error = 0; + + GIT_ASSERT_ARG(odb); + + if ((error = git_mutex_lock(&odb->lock)) < 0) { + git_error_set(GIT_ERROR_ODB, "failed to acquire the db lock"); + return error; + } + git_commit_graph_free(odb->cgraph); + odb->cgraph = cgraph; + git_mutex_unlock(&odb->lock); + + return error; +} + int git_odb_open(git_odb **out, const char *objects_dir) { git_odb *db; @@ -760,7 +769,6 @@ static void odb_free(git_odb *db) if (locked) git_mutex_unlock(&db->lock); - git_buf_dispose(&db->objects_dir); git_commit_graph_free(db->cgraph); git_vector_free(&db->backends); git_cache_dispose(&db->own_cache); @@ -806,51 +814,27 @@ static int odb_exists_1( return (int)found; } -int git_odb__get_commit_graph(git_commit_graph_file **out, git_odb *db) +int git_odb__get_commit_graph_file(git_commit_graph_file **out, git_odb *db) { int error = 0; + git_commit_graph_file *result = NULL; if ((error = git_mutex_lock(&db->lock)) < 0) { git_error_set(GIT_ERROR_ODB, "failed to acquire the db lock"); return error; } - if (!db->cgraph_checked) { - git_buf commit_graph_path = GIT_BUF_INIT; - git_commit_graph_file *cgraph = NULL; - - /* We only check once, no matter the result. */ - db->cgraph_checked = 1; - - if (git_buf_len(&db->objects_dir) == 0) { - /* - * This odb was not opened with an objects directory - * associated. Skip opening the commit graph. - */ - goto done; - } - - if ((error = git_buf_joinpath( - &commit_graph_path, - git_buf_cstr(&db->objects_dir), - "info/commit-graph")) - < 0) { - git_buf_dispose(&commit_graph_path); - goto done; - } - /* Best effort */ - error = git_commit_graph_open(&cgraph, git_buf_cstr(&commit_graph_path)); - git_buf_dispose(&commit_graph_path); - - if (error < 0) - goto done; - - db->cgraph = cgraph; + if (!db->cgraph) { + error = GIT_ENOTFOUND; + goto done; } + error = git_commit_graph_get_file(&result, db->cgraph); + if (error) + goto done; + *out = result; done: - *out = db->cgraph; git_mutex_unlock(&db->lock); - return 0; + return error; } static int odb_freshen_1( @@ -1762,13 +1746,8 @@ int git_odb_refresh(struct git_odb *db) } } } - if (db->cgraph && git_commit_graph_needs_refresh(db->cgraph, NULL)) { - /* We just free the commit graph. The next time it is requested, it will be re-loaded. */ - git_commit_graph_free(db->cgraph); - db->cgraph = NULL; - } - /* Force a lazy re-check next time it is needed. */ - db->cgraph_checked = 0; + if (db->cgraph) + git_commit_graph_refresh(db->cgraph); git_mutex_unlock(&db->lock); return 0; diff --git a/src/odb.h b/src/odb.h index dcf29c2e2..5bebb6edc 100644 --- a/src/odb.h +++ b/src/odb.h @@ -12,6 +12,7 @@ #include "git2/odb.h" #include "git2/oid.h" #include "git2/types.h" +#include "git2/sys/commit_graph.h" #include "cache.h" #include "commit_graph.h" @@ -44,10 +45,8 @@ struct git_odb { git_mutex lock; /* protects backends */ git_vector backends; git_cache own_cache; - git_buf objects_dir; - git_commit_graph_file *cgraph; + git_commit_graph *cgraph; unsigned int do_fsync :1; - unsigned int cgraph_checked :1; }; typedef enum { @@ -132,11 +131,11 @@ int git_odb__read_header_or_object( git_odb *db, const git_oid *id); /* - * Attempt to get the ODB's commit graph. This object is still owned by the - * ODB. If the repository does not contain a commit graph, it will return zero - * and `*out` will be set to NULL. + * Attempt to get the ODB's commit-graph file. This object is still owned by + * the ODB. If the repository does not contain a commit-graph, it will return + * GIT_ENOTFOUND. */ -int git_odb__get_commit_graph(git_commit_graph_file **out, git_odb *odb); +int git_odb__get_commit_graph_file(git_commit_graph_file **out, git_odb *odb); /* freshen an entry in the object database */ int git_odb__freshen(git_odb *db, const git_oid *id); diff --git a/tests/graph/commit_graph.c b/tests/graph/commit_graph.c index 4c7c5a726..c6d84a007 100644 --- a/tests/graph/commit_graph.c +++ b/tests/graph/commit_graph.c @@ -7,18 +7,18 @@ void test_graph_commit_graph__parse(void) { git_repository *repo; - struct git_commit_graph_file *cgraph; + struct git_commit_graph_file *file; struct git_commit_graph_entry e, parent; git_oid id; git_buf commit_graph_path = GIT_BUF_INIT; cl_git_pass(git_repository_open(&repo, cl_fixture("testrepo.git"))); cl_git_pass(git_buf_joinpath(&commit_graph_path, git_repository_path(repo), "objects/info/commit-graph")); - cl_git_pass(git_commit_graph_open(&cgraph, git_buf_cstr(&commit_graph_path))); - cl_assert_equal_i(git_commit_graph_needs_refresh(cgraph, git_buf_cstr(&commit_graph_path)), 0); + cl_git_pass(git_commit_graph_file_open(&file, git_buf_cstr(&commit_graph_path))); + cl_assert_equal_i(git_commit_graph_file_needs_refresh(file, git_buf_cstr(&commit_graph_path)), 0); cl_git_pass(git_oid_fromstr(&id, "5001298e0c09ad9c34e4249bc5801c75e9754fa5")); - cl_git_pass(git_commit_graph_entry_find(&e, cgraph, &id, GIT_OID_HEXSZ)); + cl_git_pass(git_commit_graph_entry_find(&e, file, &id, GIT_OID_HEXSZ)); cl_assert_equal_oid(&e.sha1, &id); cl_git_pass(git_oid_fromstr(&id, "418382dff1ffb8bdfba833f4d8bbcde58b1e7f47")); cl_assert_equal_oid(&e.tree_oid, &id); @@ -27,23 +27,23 @@ void test_graph_commit_graph__parse(void) cl_assert_equal_i(e.parent_count, 0); cl_git_pass(git_oid_fromstr(&id, "be3563ae3f795b2b4353bcce3a527ad0a4f7f644")); - cl_git_pass(git_commit_graph_entry_find(&e, cgraph, &id, GIT_OID_HEXSZ)); + cl_git_pass(git_commit_graph_entry_find(&e, file, &id, GIT_OID_HEXSZ)); cl_assert_equal_oid(&e.sha1, &id); cl_assert_equal_i(e.generation, 5); cl_assert_equal_i(e.commit_time, 1274813907ull); cl_assert_equal_i(e.parent_count, 2); cl_git_pass(git_oid_fromstr(&id, "9fd738e8f7967c078dceed8190330fc8648ee56a")); - cl_git_pass(git_commit_graph_entry_parent(&parent, cgraph, &e, 0)); + cl_git_pass(git_commit_graph_entry_parent(&parent, file, &e, 0)); cl_assert_equal_oid(&parent.sha1, &id); cl_assert_equal_i(parent.generation, 4); cl_git_pass(git_oid_fromstr(&id, "c47800c7266a2be04c571c04d5a6614691ea99bd")); - cl_git_pass(git_commit_graph_entry_parent(&parent, cgraph, &e, 1)); + cl_git_pass(git_commit_graph_entry_parent(&parent, file, &e, 1)); cl_assert_equal_oid(&parent.sha1, &id); cl_assert_equal_i(parent.generation, 3); - git_commit_graph_free(cgraph); + git_commit_graph_file_free(file); git_repository_free(repo); git_buf_dispose(&commit_graph_path); } @@ -51,17 +51,17 @@ void test_graph_commit_graph__parse(void) void test_graph_commit_graph__parse_octopus_merge(void) { git_repository *repo; - struct git_commit_graph_file *cgraph; + struct git_commit_graph_file *file; struct git_commit_graph_entry e, parent; git_oid id; git_buf commit_graph_path = GIT_BUF_INIT; cl_git_pass(git_repository_open(&repo, cl_fixture("merge-recursive/.gitted"))); cl_git_pass(git_buf_joinpath(&commit_graph_path, git_repository_path(repo), "objects/info/commit-graph")); - cl_git_pass(git_commit_graph_open(&cgraph, git_buf_cstr(&commit_graph_path))); + cl_git_pass(git_commit_graph_file_open(&file, git_buf_cstr(&commit_graph_path))); cl_git_pass(git_oid_fromstr(&id, "d71c24b3b113fd1d1909998c5bfe33b86a65ee03")); - cl_git_pass(git_commit_graph_entry_find(&e, cgraph, &id, GIT_OID_HEXSZ)); + cl_git_pass(git_commit_graph_entry_find(&e, file, &id, GIT_OID_HEXSZ)); cl_assert_equal_oid(&e.sha1, &id); cl_git_pass(git_oid_fromstr(&id, "348f16ffaeb73f319a75cec5b16a0a47d2d5e27c")); cl_assert_equal_oid(&e.tree_oid, &id); @@ -70,21 +70,21 @@ void test_graph_commit_graph__parse_octopus_merge(void) cl_assert_equal_i(e.parent_count, 3); cl_git_pass(git_oid_fromstr(&id, "ad2ace9e15f66b3d1138922e6ffdc3ea3f967fa6")); - cl_git_pass(git_commit_graph_entry_parent(&parent, cgraph, &e, 0)); + cl_git_pass(git_commit_graph_entry_parent(&parent, file, &e, 0)); cl_assert_equal_oid(&parent.sha1, &id); cl_assert_equal_i(parent.generation, 6); cl_git_pass(git_oid_fromstr(&id, "483065df53c0f4a02cdc6b2910b05d388fc17ffb")); - cl_git_pass(git_commit_graph_entry_parent(&parent, cgraph, &e, 1)); + cl_git_pass(git_commit_graph_entry_parent(&parent, file, &e, 1)); cl_assert_equal_oid(&parent.sha1, &id); cl_assert_equal_i(parent.generation, 2); cl_git_pass(git_oid_fromstr(&id, "815b5a1c80ca749d705c7aa0cb294a00cbedd340")); - cl_git_pass(git_commit_graph_entry_parent(&parent, cgraph, &e, 2)); + cl_git_pass(git_commit_graph_entry_parent(&parent, file, &e, 2)); cl_assert_equal_oid(&parent.sha1, &id); cl_assert_equal_i(parent.generation, 6); - git_commit_graph_free(cgraph); + git_commit_graph_file_free(file); git_repository_free(repo); git_buf_dispose(&commit_graph_path); } -- cgit v1.2.1