diff options
author | Edward Thomson <ethomson@edwardthomson.com> | 2020-07-12 21:26:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-07-12 21:26:59 +0100 |
commit | a83fd5107879d18b31ff8173ea062136256321be (patch) | |
tree | 665e1e0b48516558ee88fc28feb82dbb0758e42b | |
parent | 26b9e489c103ad6768708feb23d9844e202766fb (diff) | |
parent | 92d42eb3d83a28febbbb50df7c398e32677da28a (diff) | |
download | libgit2-a83fd5107879d18b31ff8173ea062136256321be.tar.gz |
Merge pull request #5396 from lhchavez/mwindow-file-limit
mwindow: set limit on number of open files
-rw-r--r-- | include/git2/common.h | 18 | ||||
-rw-r--r-- | src/mwindow.c | 172 | ||||
-rw-r--r-- | src/settings.c | 9 | ||||
-rw-r--r-- | tests/pack/filelimit.c | 136 |
4 files changed, 281 insertions, 54 deletions
diff --git a/include/git2/common.h b/include/git2/common.h index d6696061d..8dd30d506 100644 --- a/include/git2/common.h +++ b/include/git2/common.h @@ -205,7 +205,9 @@ typedef enum { GIT_OPT_GET_PACK_MAX_OBJECTS, GIT_OPT_SET_PACK_MAX_OBJECTS, GIT_OPT_DISABLE_PACK_KEEP_FILE_CHECKS, - GIT_OPT_ENABLE_HTTP_EXPECT_CONTINUE + GIT_OPT_ENABLE_HTTP_EXPECT_CONTINUE, + GIT_OPT_GET_MWINDOW_FILE_LIMIT, + GIT_OPT_SET_MWINDOW_FILE_LIMIT } git_libgit2_opt_t; /** @@ -227,8 +229,18 @@ typedef enum { * * * opts(GIT_OPT_SET_MWINDOW_MAPPED_LIMIT, size_t): * - * >Set the maximum amount of memory that can be mapped at any time - * by the library + * > Set the maximum amount of memory that can be mapped at any time + * > by the library + * + * * opts(GIT_OPT_GET_MWINDOW_FILE_LIMIT, size_t *): + * + * > Get the maximum number of files that will be mapped at any time by the + * > library + * + * * opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, size_t): + * + * > Set the maximum number of files that can be mapped at any time + * > by the library. The default (0) is unlimited. * * * opts(GIT_OPT_GET_SEARCH_PATH, int level, git_buf *buf) * diff --git a/src/mwindow.c b/src/mwindow.c index 262786a5f..c257f0c71 100644 --- a/src/mwindow.c +++ b/src/mwindow.c @@ -22,11 +22,15 @@ #define DEFAULT_MAPPED_LIMIT \ ((1024 * 1024) * (sizeof(void*) >= 8 ? 8192ULL : 256UL)) +/* default is unlimited */ +#define DEFAULT_FILE_LIMIT 0 + size_t git_mwindow__window_size = DEFAULT_WINDOW_SIZE; size_t git_mwindow__mapped_limit = DEFAULT_MAPPED_LIMIT; +size_t git_mwindow__file_limit = DEFAULT_FILE_LIMIT; /* Whenever you want to read or modify this, grab git__mwindow_mutex */ -static git_mwindow_ctl mem_ctl; +git_mwindow_ctl git_mwindow__mem_ctl; /* Global list of mwindow files, to open packs once across repos */ git_strmap *git__pack_cache = NULL; @@ -132,7 +136,7 @@ void git_mwindow_free_all(git_mwindow_file *mwf) */ void git_mwindow_free_all_locked(git_mwindow_file *mwf) { - git_mwindow_ctl *ctl = &mem_ctl; + git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; size_t i; /* @@ -174,82 +178,143 @@ int git_mwindow_contains(git_mwindow *win, off64_t offset) && offset <= (off64_t)(win_off + win->window_map.len); } +#define GIT_MWINDOW__LRU -1 +#define GIT_MWINDOW__MRU 1 + /* - * Find the least-recently-used window in a file + * Find the least- or most-recently-used window in a file that is not currently + * being used. The 'only_unused' flag controls whether the caller requires the + * file to only have unused windows. + * + * Returns whether such a window was found in the file. */ -static void git_mwindow_scan_lru( - git_mwindow_file *mwf, - git_mwindow **lru_w, - git_mwindow **lru_l) +static bool git_mwindow_scan_recently_used( + git_mwindow_file *mwf, + git_mwindow **out_window, + git_mwindow **out_last, + bool only_unused, + int comparison_sign) { - git_mwindow *w, *w_l; - - for (w_l = NULL, w = mwf->windows; w; w = w->next) { - if (!w->inuse_cnt) { - /* - * If the current one is more recent than the last one, - * store it in the output parameter. If lru_w is NULL, - * it's the first loop, so store it as well. - */ - if (!*lru_w || w->last_used < (*lru_w)->last_used) { - *lru_w = w; - *lru_l = w_l; - } + git_mwindow *w, *w_last; + git_mwindow *lru_window = NULL, *lru_last = NULL; + + assert(mwf); + assert(out_window); + + lru_window = *out_window; + if (out_last) + lru_last = *out_last; + + for (w_last = NULL, w = mwf->windows; w; w_last = w, w = w->next) { + if (w->inuse_cnt) { + if (only_unused) + return false; + /* This window is currently being used. Skip it. */ + continue; + } + + /* + * If the current one is more (or less) recent than the last one, + * store it in the output parameter. If lru_window is NULL, + * it's the first loop, so store it as well. + */ + if (!lru_window || + (comparison_sign == GIT_MWINDOW__LRU && lru_window->last_used > w->last_used) || + (comparison_sign == GIT_MWINDOW__MRU && lru_window->last_used < w->last_used)) { + lru_window = w; + lru_last = w_last; } - w_l = w; } + + if (!lru_window && !lru_last) + return false; + + *out_window = lru_window; + if (out_last) + *out_last = lru_last; + return true; } /* - * Close the least recently used window. You should check to see if - * the file descriptors need closing from time to time. Called under - * lock from new_window. + * Close the least recently used window (that is currently not being used) out + * of all the files. Called under lock from new_window. */ -static int git_mwindow_close_lru(git_mwindow_file *mwf) +static int git_mwindow_close_lru_window(void) { - git_mwindow_ctl *ctl = &mem_ctl; + git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; + git_mwindow_file *cur; size_t i; - git_mwindow *lru_w = NULL, *lru_l = NULL, **list = &mwf->windows; - - /* FIXME: Does this give us any advantage? */ - if(mwf->windows) - git_mwindow_scan_lru(mwf, &lru_w, &lru_l); + git_mwindow *lru_window = NULL, *lru_last = NULL, **list = NULL; - for (i = 0; i < ctl->windowfiles.length; ++i) { - git_mwindow *last = lru_w; - git_mwindow_file *cur = git_vector_get(&ctl->windowfiles, i); - git_mwindow_scan_lru(cur, &lru_w, &lru_l); - if (lru_w != last) + git_vector_foreach(&ctl->windowfiles, i, cur) { + if (git_mwindow_scan_recently_used( + cur, &lru_window, &lru_last, false, GIT_MWINDOW__LRU)) { list = &cur->windows; + } } - if (!lru_w) { + if (!lru_window) { git_error_set(GIT_ERROR_OS, "failed to close memory window; couldn't find LRU"); return -1; } - ctl->mapped -= lru_w->window_map.len; - git_futils_mmap_free(&lru_w->window_map); + ctl->mapped -= lru_window->window_map.len; + git_futils_mmap_free(&lru_window->window_map); - if (lru_l) - lru_l->next = lru_w->next; + if (lru_last) + lru_last->next = lru_window->next; else - *list = lru_w->next; + *list = lru_window->next; - git__free(lru_w); + git__free(lru_window); ctl->open_windows--; return 0; } +/* + * Close the file that does not have any open windows AND whose + * most-recently-used window is the least-recently used one across all + * currently open files. + * + * Called under lock from new_window. + */ +static int git_mwindow_close_lru_file(void) +{ + git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; + git_mwindow_file *lru_file = NULL, *current_file = NULL; + git_mwindow *lru_window = NULL; + size_t i; + + git_vector_foreach(&ctl->windowfiles, i, current_file) { + git_mwindow *mru_window = NULL; + if (!git_mwindow_scan_recently_used( + current_file, &mru_window, NULL, true, GIT_MWINDOW__MRU)) { + continue; + } + if (!lru_window || lru_window->last_used > mru_window->last_used) + lru_file = current_file; + } + + if (!lru_file) { + git_error_set(GIT_ERROR_OS, "failed to close memory window file; couldn't find LRU"); + return -1; + } + + git_mwindow_free_all_locked(lru_file); + p_close(lru_file->fd); + lru_file->fd = -1; + + return 0; +} + /* This gets called under lock from git_mwindow_open */ static git_mwindow *new_window( - git_mwindow_file *mwf, git_file fd, off64_t size, off64_t offset) { - git_mwindow_ctl *ctl = &mem_ctl; + git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; size_t walign = git_mwindow__window_size / 2; off64_t len; git_mwindow *w; @@ -269,7 +334,7 @@ static git_mwindow *new_window( ctl->mapped += (size_t)len; while (git_mwindow__mapped_limit < ctl->mapped && - git_mwindow_close_lru(mwf) == 0) /* nop */; + git_mwindow_close_lru_window() == 0) /* nop */; /* * We treat `mapped_limit` as a soft limit. If we can't find a @@ -283,7 +348,7 @@ static git_mwindow *new_window( * we're below our soft limits, so free up what we can and try again. */ - while (git_mwindow_close_lru(mwf) == 0) + while (git_mwindow_close_lru_window() == 0) /* nop */; if (git_futils_mmap_ro(&w->window_map, fd, w->offset, (size_t)len) < 0) { @@ -315,7 +380,7 @@ unsigned char *git_mwindow_open( size_t extra, unsigned int *left) { - git_mwindow_ctl *ctl = &mem_ctl; + git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; git_mwindow *w = *cursor; if (git_mutex_lock(&git__mwindow_mutex)) { @@ -339,7 +404,7 @@ unsigned char *git_mwindow_open( * one. */ if (!w) { - w = new_window(mwf, mwf->fd, mwf->size, offset); + w = new_window(mwf->fd, mwf->size, offset); if (w == NULL) { git_mutex_unlock(&git__mwindow_mutex); return NULL; @@ -367,7 +432,7 @@ unsigned char *git_mwindow_open( int git_mwindow_file_register(git_mwindow_file *mwf) { - git_mwindow_ctl *ctl = &mem_ctl; + git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; int ret; if (git_mutex_lock(&git__mwindow_mutex)) { @@ -381,6 +446,11 @@ int git_mwindow_file_register(git_mwindow_file *mwf) return -1; } + if (git_mwindow__file_limit) { + while (git_mwindow__file_limit <= ctl->windowfiles.length && + git_mwindow_close_lru_file() == 0) /* nop */; + } + ret = git_vector_insert(&ctl->windowfiles, mwf); git_mutex_unlock(&git__mwindow_mutex); @@ -389,7 +459,7 @@ int git_mwindow_file_register(git_mwindow_file *mwf) void git_mwindow_file_deregister(git_mwindow_file *mwf) { - git_mwindow_ctl *ctl = &mem_ctl; + git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; git_mwindow_file *cur; size_t i; diff --git a/src/settings.c b/src/settings.c index f9f6b8497..69ebcb7ab 100644 --- a/src/settings.c +++ b/src/settings.c @@ -59,6 +59,7 @@ int git_libgit2_features(void) /* Declarations for tuneable settings */ extern size_t git_mwindow__window_size; extern size_t git_mwindow__mapped_limit; +extern size_t git_mwindow__file_limit; extern size_t git_indexer__max_objects; extern bool git_disable_pack_keep_file_checks; @@ -124,6 +125,14 @@ int git_libgit2_opts(int key, ...) *(va_arg(ap, size_t *)) = git_mwindow__mapped_limit; break; + case GIT_OPT_SET_MWINDOW_FILE_LIMIT: + git_mwindow__file_limit = va_arg(ap, size_t); + break; + + case GIT_OPT_GET_MWINDOW_FILE_LIMIT: + *(va_arg(ap, size_t *)) = git_mwindow__file_limit; + break; + case GIT_OPT_GET_SEARCH_PATH: if ((error = config_level_to_sysdir(va_arg(ap, int))) >= 0) { git_buf *out = va_arg(ap, git_buf *); diff --git a/tests/pack/filelimit.c b/tests/pack/filelimit.c new file mode 100644 index 000000000..044679f3b --- /dev/null +++ b/tests/pack/filelimit.c @@ -0,0 +1,136 @@ +#include "clar_libgit2.h" +#include "mwindow.h" +#include "global.h" + +#include <git2.h> +#include "git2/sys/commit.h" +#include "git2/sys/mempack.h" + +static size_t expected_open_mwindow_files = 0; +static size_t original_mwindow_file_limit = 0; + +extern git_mwindow_ctl git_mwindow__mem_ctl; + +void test_pack_filelimit__initialize_tiny(void) +{ + expected_open_mwindow_files = 1; + cl_git_pass(git_libgit2_opts(GIT_OPT_GET_MWINDOW_FILE_LIMIT, &original_mwindow_file_limit)); + cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, expected_open_mwindow_files)); +} + +void test_pack_filelimit__initialize_medium(void) +{ + expected_open_mwindow_files = 10; + cl_git_pass(git_libgit2_opts(GIT_OPT_GET_MWINDOW_FILE_LIMIT, &original_mwindow_file_limit)); + cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, expected_open_mwindow_files)); +} + +void test_pack_filelimit__initialize_unlimited(void) +{ + expected_open_mwindow_files = 15; + cl_git_pass(git_libgit2_opts(GIT_OPT_GET_MWINDOW_FILE_LIMIT, &original_mwindow_file_limit)); + cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, 0)); +} + +void test_pack_filelimit__cleanup(void) +{ + git_buf path = GIT_BUF_INIT; + cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, original_mwindow_file_limit)); + + cl_git_pass(git_buf_joinpath(&path, clar_sandbox_path(), "repo.git")); + cl_fixture_cleanup(path.ptr); + git_buf_dispose(&path); +} + +/* + * Create a packfile with one commit, one tree, and two blobs. The first blob + * (README.md) has the same content in all commits, but the second one + * (file.txt) has a different content in each commit. + */ +void create_packfile_commit( + git_repository *repo, + git_oid *out_commit_id, + git_oid *parent_id, + size_t commit_index, + size_t commit_count) +{ + git_buf file_contents = GIT_BUF_INIT; + git_treebuilder *treebuilder; + git_packbuilder *packbuilder; + git_signature *s; + git_oid oid, tree_id, commit_id; + const git_oid *parents[] = { parent_id }; + size_t parent_count = parent_id ? 1 : 0; + + cl_git_pass(git_treebuilder_new(&treebuilder, repo, NULL)); + + cl_git_pass(git_blob_create_from_buffer(&oid, repo, "", 0)); + cl_git_pass(git_treebuilder_insert(NULL, treebuilder, "README.md", &oid, 0100644)); + + cl_git_pass(git_buf_printf(&file_contents, "Commit %zd/%zd", commit_index, commit_count)); + cl_git_pass(git_blob_create_from_buffer(&oid, repo, file_contents.ptr, file_contents.size)); + cl_git_pass(git_treebuilder_insert(NULL, treebuilder, "file.txt", &oid, 0100644)); + + cl_git_pass(git_treebuilder_write(&tree_id, treebuilder)); + cl_git_pass(git_signature_now(&s, "alice", "alice@example.com")); + cl_git_pass(git_commit_create_from_ids(&commit_id, repo, "refs/heads/master", s, s, + NULL, file_contents.ptr, &tree_id, parent_count, parents)); + + cl_git_pass(git_packbuilder_new(&packbuilder, repo)); + cl_git_pass(git_packbuilder_insert_commit(packbuilder, &commit_id)); + cl_git_pass(git_packbuilder_write(packbuilder, NULL, 0, NULL, NULL)); + + cl_git_pass(git_oid_cpy(out_commit_id, &commit_id)); + + git_buf_dispose(&file_contents); + git_treebuilder_free(treebuilder); + git_packbuilder_free(packbuilder); + git_signature_free(s); +} + +void test_pack_filelimit__open_repo_with_multiple_packfiles(void) +{ + git_buf path = GIT_BUF_INIT; + git_mwindow_ctl *ctl = &git_mwindow__mem_ctl; + git_repository *repo; + git_revwalk *walk; + git_oid id, *parent_id = NULL; + size_t i; + const size_t commit_count = 16; + unsigned int open_windows; + + /* + * Create a repository and populate it with 16 commits, each in its own + * packfile. + */ + cl_git_pass(git_buf_joinpath(&path, clar_sandbox_path(), "repo.git")); + cl_git_pass(git_repository_init(&repo, path.ptr, true)); + for (i = 0; i < commit_count; ++i) { + create_packfile_commit(repo, &id, parent_id, i + 1, commit_count); + parent_id = &id; + } + + cl_git_pass(git_revwalk_new(&walk, repo)); + cl_git_pass(git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL)); + cl_git_pass(git_revwalk_push_ref(walk, "refs/heads/master")); + + /* Walking the repository requires eventually opening each of the packfiles. */ + i = 0; + while (git_revwalk_next(&id, walk) == 0) + ++i; + cl_assert_equal_i(commit_count, i); + + cl_git_pass(git_mutex_lock(&git__mwindow_mutex)); + /* + * Adding an assert while holding a lock will cause the whole process to + * deadlock. Copy the value and do the assert after releasing the lock. + */ + open_windows = ctl->open_windows; + cl_git_pass(git_mutex_unlock(&git__mwindow_mutex)); + + cl_assert_equal_i(expected_open_mwindow_files, open_windows); + + git_buf_dispose(&path); + git_revwalk_free(walk); + git_repository_free(repo); +} |