From 4f027355f6b6b5b2ba69e23ff50cb7313d13dd70 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:08 -0600 Subject: builtin/commit-graph.c: support for '--split[=]' With '--split', the commit-graph machinery writes new commits in another incremental commit-graph which is part of the existing chain, and optionally decides to condense the chain into a single commit-graph. This is done to ensure that the asymptotic behavior of looking up a commit in an incremental chain is not dominated by the number of incrementals in that chain. It can be controlled by the '--max-commits' and '--size-multiple' options. In the next two commits, we will introduce additional splitting strategies that can exert additional control over: - when a split commit-graph is and isn't written, and - when the existing commit-graph chain is discarded completely and replaced with another graph To prepare for this, make '--split' take an optional strategy (as in '--split[=]'), and add a new enum to describe which strategy is being used. For now, no strategies are given, and the only enumerated value is 'COMMIT_GRAPH_SPLIT_UNSPECIFIED', indicating the absence of a strategy. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'builtin/commit-graph.c') diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 4a70b33fb5..345fd97c61 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -9,7 +9,9 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--[no-]progress] "), + N_("git commit-graph write [--object-dir ] [--append] " + "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " + "[--[no-]progress] "), NULL }; @@ -19,7 +21,9 @@ static const char * const builtin_commit_graph_verify_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--[no-]progress] "), + N_("git commit-graph write [--object-dir ] [--append] " + "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " + "[--[no-]progress] "), NULL }; @@ -111,6 +115,18 @@ static int graph_verify(int argc, const char **argv) extern int read_replace_refs; static struct split_commit_graph_opts split_opts; +static int write_option_parse_split(const struct option *opt, const char *arg, + int unset) +{ + opts.split = 1; + if (!arg) + return 0; + + die(_("unrecognized --split argument, %s"), arg); + + return 0; +} + static int graph_write(int argc, const char **argv) { struct string_list *pack_indexes = NULL; @@ -133,8 +149,10 @@ static int graph_write(int argc, const char **argv) OPT_BOOL(0, "append", &opts.append, N_("include all commits already in the commit-graph file")), OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")), - OPT_BOOL(0, "split", &opts.split, - N_("allow writing an incremental commit-graph file")), + OPT_CALLBACK_F(0, "split", &split_opts.flags, NULL, + N_("allow writing an incremental commit-graph file"), + PARSE_OPT_OPTARG | PARSE_OPT_NONEG, + write_option_parse_split), OPT_INTEGER(0, "max-commits", &split_opts.max_commits, N_("maximum number of commits in a non-base split commit-graph")), OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, -- cgit v1.2.1 From fdbde82fe523374b3c5d1f0f01f3c43dcaca9465 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:12 -0600 Subject: builtin/commit-graph.c: introduce split strategy 'no-merge' In the previous commit, we laid the groundwork for supporting different splitting strategies. In this commit, we introduce the first splitting strategy: 'no-merge'. Passing '--split=no-merge' is useful for callers which wish to write a new incremental commit-graph, but do not want to spend effort condensing the incremental chain [1]. Previously, this was possible by passing '--size-multiple=0', but this no longer the case following 63020f175f (commit-graph: prefer default size_mult when given zero, 2020-01-02). When '--split=no-merge' is given, the commit-graph machinery will never condense an existing chain, and it will always write a new incremental. [1]: This might occur when, for example, a server administrator running some program after each push may want to ensure that each job runs proportional in time to the size of the push, and does not "jump" when the commit-graph machinery decides to trigger a merge. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'builtin/commit-graph.c') diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 345fd97c61..9234b95ecf 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -118,11 +118,16 @@ static struct split_commit_graph_opts split_opts; static int write_option_parse_split(const struct option *opt, const char *arg, int unset) { + enum commit_graph_split_flags *flags = opt->value; + opts.split = 1; if (!arg) return 0; - die(_("unrecognized --split argument, %s"), arg); + if (!strcmp(arg, "no-merge")) + *flags = COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED; + else + die(_("unrecognized --split argument, %s"), arg); return 0; } -- cgit v1.2.1 From 8a6ac287b2ba5f75bb2d9409dd97e9b501daf253 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:17 -0600 Subject: builtin/commit-graph.c: introduce split strategy 'replace' When using split commit-graphs, it is sometimes useful to completely replace the commit-graph chain with a new base. For example, consider a scenario in which a repository builds a new commit-graph incremental for each push. Occasionally (say, after some fixed number of pushes), they may wish to rebuild the commit-graph chain with all reachable commits. They can do so with $ git commit-graph write --reachable but this removes the chain entirely and replaces it with a single commit-graph in 'objects/info/commit-graph'. Unfortunately, this means that the next push will have to move this commit-graph into the first layer of a new chain, and then write its new commits on top. Avoid such copying entirely by allowing the caller to specify that they wish to replace the entirety of their commit-graph chain, while also specifying that the new commit-graph should become the basis of a fresh, length-one chain. This addresses the above situation by making it possible for the caller to instead write: $ git commit-graph write --reachable --split=replace which writes a new length-one chain to 'objects/info/commit-graphs', making the commit-graph incremental generated by the subsequent push relatively cheap by avoiding the aforementioned copy. In order to do this, remove an assumption in 'write_commit_graph_file' that chains are always at least two incrementals long. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'builtin/commit-graph.c') diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 9234b95ecf..e0dd2876a1 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -126,6 +126,8 @@ static int write_option_parse_split(const struct option *opt, const char *arg, if (!strcmp(arg, "no-merge")) *flags = COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED; + else if (!strcmp(arg, "replace")) + *flags = COMMIT_GRAPH_SPLIT_REPLACE; else die(_("unrecognized --split argument, %s"), arg); -- cgit v1.2.1 From 6830c360777468434184f60023e2562348c9dacc Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 13 Apr 2020 22:04:25 -0600 Subject: commit-graph.h: replace 'commit_hex' with 'commits' The 'write_commit_graph()' function takes in either a string list of pack indices, or a string list of hexadecimal commit OIDs. These correspond to the '--stdin-packs' and '--stdin-commits' mode(s) from 'git commit-graph write'. Using a string_list of hexadecimal commit IDs is not the most efficient use of memory, since we can instead use the 'struct oidset', which is more well-suited for this case. This has another benefit which will become apparent in the following commit. This is that we are about to disambiguate the kinds of errors we produce with '--stdin-commits' into "non-hex input" and "hex-input, but referring to a non-commit object". By having 'write_commit_graph' take in a 'struct oidset *' of commits, we place the burden on the caller (in this case, the builtin) to handle the first case, and the commit-graph machinery can handle the second case. Suggested-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'builtin/commit-graph.c') diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index e0dd2876a1..075f8f6928 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -137,7 +137,7 @@ static int write_option_parse_split(const struct option *opt, const char *arg, static int graph_write(int argc, const char **argv) { struct string_list *pack_indexes = NULL; - struct string_list *commit_hex = NULL; + struct oidset commits = OIDSET_INIT; struct object_directory *odb = NULL; struct string_list lines; int result = 0; @@ -210,7 +210,20 @@ static int graph_write(int argc, const char **argv) if (opts.stdin_packs) pack_indexes = &lines; if (opts.stdin_commits) { - commit_hex = &lines; + struct string_list_item *item; + oidset_init(&commits, lines.nr); + for_each_string_list_item(item, &lines) { + struct object_id oid; + const char *end; + + if (parse_oid_hex(item->string, &oid, &end)) { + error(_("unexpected non-hex object ID: " + "%s"), item->string); + return 1; + } + + oidset_insert(&commits, &oid); + } flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; } @@ -219,7 +232,7 @@ static int graph_write(int argc, const char **argv) if (write_commit_graph(odb, pack_indexes, - commit_hex, + opts.stdin_commits ? &commits : NULL, flags, &split_opts)) result = 1; -- cgit v1.2.1 From 7a9ce0269bc0f4ef230f930b3910b70ac3142552 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 14 Apr 2020 22:31:37 -0600 Subject: commit-graph.c: introduce '--[no-]check-oids' When operating on a stream of commit OIDs on stdin, 'git commit-graph write' checks that each OID refers to an object that is indeed a commit. This is convenient to make sure that the given input is well-formed, but can sometimes be undesirable. For example, server operators may wish to feed the refnames that were updated during a push to 'git commit-graph write --input=stdin-commits', and silently discard refs that don't point at commits. This can be done by combing the output of 'git for-each-ref' with '--format %(*objecttype)', but this requires opening up a potentially large number of objects. Instead, it is more convenient to feed the updated refs to the commit-graph machinery, and let it throw out refs that don't point to commits. Introduce '--[no-]check-oids' to make such a behavior possible. With '--check-oids' (the default behavior to retain backwards compatibility), 'git commit-graph write' will barf on a non-commit line in its input. With 'no-check-oids', such lines will be silently ignored, making the above possible by specifying this option. No matter which is supplied, 'git commit-graph write' retains the behavior from the previous commit of rejecting non-OID inputs like "HEAD" and "refs/heads/foo" as before. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'builtin/commit-graph.c') diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 075f8f6928..2857153008 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -11,7 +11,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] "), + "[--[no-]progress] [--[no-]check-oids] "), NULL }; @@ -23,7 +23,7 @@ static const char * const builtin_commit_graph_verify_usage[] = { static const char * const builtin_commit_graph_write_usage[] = { N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] "), + "[--[no-]progress] [--[no-]check-oids] "), NULL }; @@ -36,6 +36,7 @@ static struct opts_commit_graph { int split; int shallow; int progress; + int check_oids; } opts; static struct object_directory *find_odb(struct repository *r, @@ -160,6 +161,8 @@ static int graph_write(int argc, const char **argv) N_("allow writing an incremental commit-graph file"), PARSE_OPT_OPTARG | PARSE_OPT_NONEG, write_option_parse_split), + OPT_BOOL(0, "check-oids", &opts.check_oids, + N_("require OIDs to be commits")), OPT_INTEGER(0, "max-commits", &split_opts.max_commits, N_("maximum number of commits in a non-base split commit-graph")), OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, @@ -170,6 +173,7 @@ static int graph_write(int argc, const char **argv) }; opts.progress = isatty(2); + opts.check_oids = 1; split_opts.size_multiple = 2; split_opts.max_commits = 0; split_opts.expire_time = 0; @@ -224,7 +228,8 @@ static int graph_write(int argc, const char **argv) oidset_insert(&commits, &oid); } - flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; + if (opts.check_oids) + flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; } UNLEAK(buf); -- cgit v1.2.1 From dbd5e0a1861c5bb1446e5518173aa1760c6617b0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 29 Apr 2020 12:44:40 -0700 Subject: Revert "commit-graph.c: introduce '--[no-]check-oids'" This reverts commit 7a9ce0269bc0f4ef230f930b3910b70ac3142552, which has not yet gained consensus. --- builtin/commit-graph.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'builtin/commit-graph.c') diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 2857153008..075f8f6928 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -11,7 +11,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] [--[no-]check-oids] "), + "[--[no-]progress] "), NULL }; @@ -23,7 +23,7 @@ static const char * const builtin_commit_graph_verify_usage[] = { static const char * const builtin_commit_graph_write_usage[] = { N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] [--[no-]check-oids] "), + "[--[no-]progress] "), NULL }; @@ -36,7 +36,6 @@ static struct opts_commit_graph { int split; int shallow; int progress; - int check_oids; } opts; static struct object_directory *find_odb(struct repository *r, @@ -161,8 +160,6 @@ static int graph_write(int argc, const char **argv) N_("allow writing an incremental commit-graph file"), PARSE_OPT_OPTARG | PARSE_OPT_NONEG, write_option_parse_split), - OPT_BOOL(0, "check-oids", &opts.check_oids, - N_("require OIDs to be commits")), OPT_INTEGER(0, "max-commits", &split_opts.max_commits, N_("maximum number of commits in a non-base split commit-graph")), OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, @@ -173,7 +170,6 @@ static int graph_write(int argc, const char **argv) }; opts.progress = isatty(2); - opts.check_oids = 1; split_opts.size_multiple = 2; split_opts.max_commits = 0; split_opts.expire_time = 0; @@ -228,8 +224,7 @@ static int graph_write(int argc, const char **argv) oidset_insert(&commits, &oid); } - if (opts.check_oids) - flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; + flags |= COMMIT_GRAPH_WRITE_CHECK_OIDS; } UNLEAK(buf); -- cgit v1.2.1