diff options
-rw-r--r-- | builtin/grep.c | 64 | ||||
-rw-r--r-- | config.c | 20 | ||||
-rw-r--r-- | config.h | 3 | ||||
-rw-r--r-- | grep.c | 51 | ||||
-rw-r--r-- | grep.h | 22 | ||||
-rw-r--r-- | object-file.c | 5 | ||||
-rw-r--r-- | submodule-config.c | 5 | ||||
-rw-r--r-- | submodule.c | 25 | ||||
-rw-r--r-- | submodule.h | 8 | ||||
-rw-r--r-- | t/README | 10 | ||||
-rwxr-xr-x | t/t7814-grep-recurse-submodules.sh | 3 |
11 files changed, 161 insertions, 55 deletions
diff --git a/builtin/grep.c b/builtin/grep.c index 7d2f8e5adb..51278b01fa 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -65,6 +65,9 @@ static int todo_done; /* Has all work items been added? */ static int all_work_added; +static struct repository **repos_to_free; +static size_t repos_to_free_nr, repos_to_free_alloc; + /* This lock protects all the variables above. */ static pthread_mutex_t grep_mutex; @@ -168,6 +171,19 @@ static void work_done(struct work_item *w) grep_unlock(); } +static void free_repos(void) +{ + int i; + + for (i = 0; i < repos_to_free_nr; i++) { + repo_clear(repos_to_free[i]); + free(repos_to_free[i]); + } + FREE_AND_NULL(repos_to_free); + repos_to_free_nr = 0; + repos_to_free_alloc = 0; +} + static void *run(void *arg) { int hit = 0; @@ -333,7 +349,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid, struct grep_source gs; grep_source_name(opt, filename, tree_name_len, &pathbuf); - grep_source_init(&gs, GREP_SOURCE_OID, pathbuf.buf, path, oid); + grep_source_init_oid(&gs, pathbuf.buf, path, oid, opt->repo); strbuf_release(&pathbuf); if (num_threads > 1) { @@ -359,7 +375,7 @@ static int grep_file(struct grep_opt *opt, const char *filename) struct grep_source gs; grep_source_name(opt, filename, 0, &buf); - grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename, filename); + grep_source_init_file(&gs, buf.buf, filename); strbuf_release(&buf); if (num_threads > 1) { @@ -415,19 +431,24 @@ static int grep_submodule(struct grep_opt *opt, const struct object_id *oid, const char *filename, const char *path, int cached) { - struct repository subrepo; + struct repository *subrepo; struct repository *superproject = opt->repo; const struct submodule *sub; struct grep_opt subopt; - int hit; + int hit = 0; sub = submodule_from_path(superproject, null_oid(), path); if (!is_submodule_active(superproject, path)) return 0; - if (repo_submodule_init(&subrepo, superproject, sub)) + subrepo = xmalloc(sizeof(*subrepo)); + if (repo_submodule_init(subrepo, superproject, sub)) { + free(subrepo); return 0; + } + ALLOC_GROW(repos_to_free, repos_to_free_nr + 1, repos_to_free_alloc); + repos_to_free[repos_to_free_nr++] = subrepo; /* * NEEDSWORK: repo_read_gitmodules() might call @@ -438,53 +459,49 @@ static int grep_submodule(struct grep_opt *opt, * subrepo's odbs to the in-memory alternates list. */ obj_read_lock(); - repo_read_gitmodules(&subrepo, 0); + repo_read_gitmodules(subrepo, 0); /* - * NEEDSWORK: This adds the submodule's object directory to the list of - * alternates for the single in-memory object store. This has some bad - * consequences for memory (processed objects will never be freed) and - * performance (this increases the number of pack files git has to pay - * attention to, to the sum of the number of pack files in all the - * repositories processed so far). This can be removed once the object - * store is no longer global and instead is a member of the repository - * object. + * All code paths tested by test code no longer need submodule ODBs to + * be added as alternates, but add it to the list just in case. + * Submodule ODBs added through add_submodule_odb_by_path() will be + * lazily registered as alternates when needed (and except in an + * unexpected code interaction, it won't be needed). */ - add_to_alternates_memory(subrepo.objects->odb->path); + add_submodule_odb_by_path(subrepo->objects->odb->path); obj_read_unlock(); memcpy(&subopt, opt, sizeof(subopt)); - subopt.repo = &subrepo; + subopt.repo = subrepo; if (oid) { - struct object *object; + enum object_type object_type; struct tree_desc tree; void *data; unsigned long size; struct strbuf base = STRBUF_INIT; obj_read_lock(); - object = parse_object_or_die(oid, NULL); + object_type = oid_object_info(subrepo, oid, NULL); obj_read_unlock(); - data = read_object_with_reference(&subrepo, - &object->oid, tree_type, + data = read_object_with_reference(subrepo, + oid, tree_type, &size, NULL); if (!data) - die(_("unable to read tree (%s)"), oid_to_hex(&object->oid)); + die(_("unable to read tree (%s)"), oid_to_hex(oid)); strbuf_addstr(&base, filename); strbuf_addch(&base, '/'); init_tree_desc(&tree, data, size); hit = grep_tree(&subopt, pathspec, &tree, &base, base.len, - object->type == OBJ_COMMIT); + object_type == OBJ_COMMIT); strbuf_release(&base); free(data); } else { hit = grep_cache(&subopt, pathspec, cached); } - repo_clear(&subrepo); return hit; } @@ -1182,5 +1199,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) run_pager(&opt, prefix); clear_pathspec(&pathspec); free_grep_patterns(&opt); + free_repos(); return !hit; } @@ -1796,6 +1796,7 @@ int git_config_from_mem(config_fn_t fn, int git_config_from_blob_oid(config_fn_t fn, const char *name, + struct repository *repo, const struct object_id *oid, void *data) { @@ -1804,7 +1805,7 @@ int git_config_from_blob_oid(config_fn_t fn, unsigned long size; int ret; - buf = read_object_file(oid, &type, &size); + buf = repo_read_object_file(repo, oid, &type, &size); if (!buf) return error(_("unable to load config blob object '%s'"), name); if (type != OBJ_BLOB) { @@ -1820,14 +1821,15 @@ int git_config_from_blob_oid(config_fn_t fn, } static int git_config_from_blob_ref(config_fn_t fn, + struct repository *repo, const char *name, void *data) { struct object_id oid; - if (get_oid(name, &oid) < 0) + if (repo_get_oid(repo, name, &oid) < 0) return error(_("unable to resolve config blob '%s'"), name); - return git_config_from_blob_oid(fn, name, &oid, data); + return git_config_from_blob_oid(fn, name, repo, &oid, data); } char *git_system_config(void) @@ -1958,12 +1960,16 @@ int config_with_options(config_fn_t fn, void *data, * If we have a specific filename, use it. Otherwise, follow the * regular lookup sequence. */ - if (config_source && config_source->use_stdin) + if (config_source && config_source->use_stdin) { return git_config_from_stdin(fn, data); - else if (config_source && config_source->file) + } else if (config_source && config_source->file) { return git_config_from_file(fn, config_source->file, data); - else if (config_source && config_source->blob) - return git_config_from_blob_ref(fn, config_source->blob, data); + } else if (config_source && config_source->blob) { + struct repository *repo = config_source->repo ? + config_source->repo : the_repository; + return git_config_from_blob_ref(fn, repo, config_source->blob, + data); + } return do_git_config_sequence(opts, fn, data); } @@ -49,6 +49,8 @@ const char *config_scope_name(enum config_scope scope); struct git_config_source { unsigned int use_stdin:1; const char *file; + /* The repository if blob is not NULL; leave blank for the_repository */ + struct repository *repo; const char *blob; enum config_scope scope; }; @@ -136,6 +138,7 @@ int git_config_from_mem(config_fn_t fn, const char *buf, size_t len, void *data, const struct config_options *opts); int git_config_from_blob_oid(config_fn_t fn, const char *name, + struct repository *repo, const struct object_id *oid, void *data); void git_config_push_parameter(const char *text); void git_config_push_env(const char *spec); @@ -1825,14 +1825,24 @@ int grep_source(struct grep_opt *opt, struct grep_source *gs) return grep_source_1(opt, gs, 0); } +static void grep_source_init_buf(struct grep_source *gs, char *buf, + unsigned long size) +{ + gs->type = GREP_SOURCE_BUF; + gs->name = NULL; + gs->path = NULL; + gs->buf = buf; + gs->size = size; + gs->driver = NULL; + gs->identifier = NULL; +} + int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) { struct grep_source gs; int r; - grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL); - gs.buf = buf; - gs.size = size; + grep_source_init_buf(&gs, buf, size); r = grep_source(opt, &gs); @@ -1840,28 +1850,30 @@ int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) return r; } -void grep_source_init(struct grep_source *gs, enum grep_source_type type, - const char *name, const char *path, - const void *identifier) +void grep_source_init_file(struct grep_source *gs, const char *name, + const char *path) { - gs->type = type; + gs->type = GREP_SOURCE_FILE; gs->name = xstrdup_or_null(name); gs->path = xstrdup_or_null(path); gs->buf = NULL; gs->size = 0; gs->driver = NULL; + gs->identifier = xstrdup(path); +} - switch (type) { - case GREP_SOURCE_FILE: - gs->identifier = xstrdup(identifier); - break; - case GREP_SOURCE_OID: - gs->identifier = oiddup(identifier); - break; - case GREP_SOURCE_BUF: - gs->identifier = NULL; - break; - } +void grep_source_init_oid(struct grep_source *gs, const char *name, + const char *path, const struct object_id *oid, + struct repository *repo) +{ + gs->type = GREP_SOURCE_OID; + gs->name = xstrdup_or_null(name); + gs->path = xstrdup_or_null(path); + gs->buf = NULL; + gs->size = 0; + gs->driver = NULL; + gs->identifier = oiddup(oid); + gs->repo = repo; } void grep_source_clear(struct grep_source *gs) @@ -1890,7 +1902,8 @@ static int grep_source_load_oid(struct grep_source *gs) { enum object_type type; - gs->buf = read_object_file(gs->identifier, &type, &gs->size); + gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type, + &gs->size); if (!gs->buf) return error(_("'%s': unable to read %s"), gs->name, @@ -120,7 +120,20 @@ struct grep_opt { struct grep_pat *header_list; struct grep_pat **header_tail; struct grep_expr *pattern_expression; + + /* + * NEEDSWORK: See if we can remove this field, because the repository + * should probably be per-source. That is, grep.c functions using this + * field should probably start using "repo" in "struct grep_source" + * instead. + * + * This is potentially the cause of at least one bug - "git grep" + * ignoring the textconv attributes from submodules. See [1] for more + * information. + * [1] https://lore.kernel.org/git/CAHd-oW5iEQarYVxEXoTG-ua2zdoybTrSjCBKtO0YT292fm0NQQ@mail.gmail.com/ + */ struct repository *repo; + const char *prefix; int prefix_length; regex_t regexp; @@ -187,6 +200,7 @@ struct grep_source { GREP_SOURCE_BUF, } type; void *identifier; + struct repository *repo; /* if GREP_SOURCE_OID */ char *buf; unsigned long size; @@ -195,9 +209,11 @@ struct grep_source { struct userdiff_driver *driver; }; -void grep_source_init(struct grep_source *gs, enum grep_source_type type, - const char *name, const char *path, - const void *identifier); +void grep_source_init_file(struct grep_source *gs, const char *name, + const char *path); +void grep_source_init_oid(struct grep_source *gs, const char *name, + const char *path, const struct object_id *oid, + struct repository *repo); void grep_source_clear_data(struct grep_source *gs); void grep_source_clear(struct grep_source *gs); void grep_source_load_driver(struct grep_source *gs, diff --git a/object-file.c b/object-file.c index a4d720b4f5..c1f7ece055 100644 --- a/object-file.c +++ b/object-file.c @@ -32,6 +32,7 @@ #include "packfile.h" #include "object-store.h" #include "promisor-remote.h" +#include "submodule.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -1613,6 +1614,10 @@ static int do_oid_object_info_extended(struct repository *r, break; } + if (register_all_submodule_odb_as_alternates()) + /* We added some alternates; retry */ + continue; + /* Check if it is a missing object */ if (fetch_if_missing && repo_has_promisor_remote(r) && !already_retried && diff --git a/submodule-config.c b/submodule-config.c index 2026120fb3..f95344028b 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -649,9 +649,10 @@ static void config_from_gitmodules(config_fn_t fn, struct repository *repo, void config_source.file = file; } else if (repo_get_oid(repo, GITMODULES_INDEX, &oid) >= 0 || repo_get_oid(repo, GITMODULES_HEAD, &oid) >= 0) { + config_source.repo = repo; config_source.blob = oidstr = xstrdup(oid_to_hex(&oid)); if (repo != the_repository) - add_to_alternates_memory(repo->objects->odb->path); + add_submodule_odb_by_path(repo->objects->odb->path); } else { goto out; } @@ -702,7 +703,7 @@ void gitmodules_config_oid(const struct object_id *commit_oid) if (gitmodule_oid_from_commit(commit_oid, &oid, &rev)) { git_config_from_blob_oid(gitmodules_cb, rev.buf, - &oid, the_repository); + the_repository, &oid, the_repository); } strbuf_release(&rev); diff --git a/submodule.c b/submodule.c index 8e611fe1db..8de1aecaeb 100644 --- a/submodule.c +++ b/submodule.c @@ -165,6 +165,8 @@ void stage_updated_gitmodules(struct index_state *istate) die(_("staging updated .gitmodules failed")); } +static struct string_list added_submodule_odb_paths = STRING_LIST_INIT_NODUP; + /* TODO: remove this function, use repo_submodule_init instead. */ int add_submodule_odb(const char *path) { @@ -178,12 +180,33 @@ int add_submodule_odb(const char *path) ret = -1; goto done; } - add_to_alternates_memory(objects_directory.buf); + string_list_insert(&added_submodule_odb_paths, + strbuf_detach(&objects_directory, NULL)); done: strbuf_release(&objects_directory); return ret; } +void add_submodule_odb_by_path(const char *path) +{ + string_list_insert(&added_submodule_odb_paths, xstrdup(path)); +} + +int register_all_submodule_odb_as_alternates(void) +{ + int i; + int ret = added_submodule_odb_paths.nr; + + for (i = 0; i < added_submodule_odb_paths.nr; i++) + add_to_alternates_memory(added_submodule_odb_paths.items[i].string); + if (ret) { + string_list_clear(&added_submodule_odb_paths, 0); + if (git_env_bool("GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB", 0)) + BUG("register_all_submodule_odb_as_alternates() called"); + } + return ret; +} + void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, const char *path) { diff --git a/submodule.h b/submodule.h index 84640c49c1..17a06cc43b 100644 --- a/submodule.h +++ b/submodule.h @@ -97,7 +97,15 @@ int submodule_uses_gitfile(const char *path); #define SUBMODULE_REMOVAL_IGNORE_IGNORED_UNTRACKED (1<<2) int bad_to_remove_submodule(const char *path, unsigned flags); +/* + * Call add_submodule_odb() to add the submodule at the given path to a list. + * When register_all_submodule_odb_as_alternates() is called, the object stores + * of all submodules in that list will be added as alternates in + * the_repository. + */ int add_submodule_odb(const char *path); +void add_submodule_odb_by_path(const char *path); +int register_all_submodule_odb_as_alternates(void); /* * Checks if there are submodule changes in a..b. If a is the null OID, @@ -452,6 +452,16 @@ GIT_TEST_CHECKOUT_WORKERS=<n> overrides the 'checkout.workers' setting to <n> and 'checkout.thresholdForParallelism' to 0, forcing the execution of the parallel-checkout code. +GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB=<boolean>, when true, makes +registering submodule ODBs as alternates a fatal action. Support for +this environment variable can be removed once the migration to +explicitly providing repositories when accessing submodule objects is +complete (in which case we might want to replace this with a trace2 +call so that users can make it visible if accessing submodule objects +without an explicit repository still happens) or needs to be abandoned +for whatever reason (in which case the migrated codepaths still retain +their performance benefits). + Naming Tests ------------ diff --git a/t/t7814-grep-recurse-submodules.sh b/t/t7814-grep-recurse-submodules.sh index 828cb3ba58..3172f5b936 100755 --- a/t/t7814-grep-recurse-submodules.sh +++ b/t/t7814-grep-recurse-submodules.sh @@ -8,6 +8,9 @@ submodules. . ./test-lib.sh +GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB=1 +export GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB + test_expect_success 'setup directory structure and submodule' ' echo "(1|2)d(3|4)" >a && mkdir b && |