summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@edwardthomson.com>2020-07-12 21:26:59 +0100
committerGitHub <noreply@github.com>2020-07-12 21:26:59 +0100
commita83fd5107879d18b31ff8173ea062136256321be (patch)
tree665e1e0b48516558ee88fc28feb82dbb0758e42b
parent26b9e489c103ad6768708feb23d9844e202766fb (diff)
parent92d42eb3d83a28febbbb50df7c398e32677da28a (diff)
downloadlibgit2-a83fd5107879d18b31ff8173ea062136256321be.tar.gz
Merge pull request #5396 from lhchavez/mwindow-file-limit
mwindow: set limit on number of open files
-rw-r--r--include/git2/common.h18
-rw-r--r--src/mwindow.c172
-rw-r--r--src/settings.c9
-rw-r--r--tests/pack/filelimit.c136
4 files changed, 281 insertions, 54 deletions
diff --git a/include/git2/common.h b/include/git2/common.h
index d6696061d..8dd30d506 100644
--- a/include/git2/common.h
+++ b/include/git2/common.h
@@ -205,7 +205,9 @@ typedef enum {
GIT_OPT_GET_PACK_MAX_OBJECTS,
GIT_OPT_SET_PACK_MAX_OBJECTS,
GIT_OPT_DISABLE_PACK_KEEP_FILE_CHECKS,
- GIT_OPT_ENABLE_HTTP_EXPECT_CONTINUE
+ GIT_OPT_ENABLE_HTTP_EXPECT_CONTINUE,
+ GIT_OPT_GET_MWINDOW_FILE_LIMIT,
+ GIT_OPT_SET_MWINDOW_FILE_LIMIT
} git_libgit2_opt_t;
/**
@@ -227,8 +229,18 @@ typedef enum {
*
* * opts(GIT_OPT_SET_MWINDOW_MAPPED_LIMIT, size_t):
*
- * >Set the maximum amount of memory that can be mapped at any time
- * by the library
+ * > Set the maximum amount of memory that can be mapped at any time
+ * > by the library
+ *
+ * * opts(GIT_OPT_GET_MWINDOW_FILE_LIMIT, size_t *):
+ *
+ * > Get the maximum number of files that will be mapped at any time by the
+ * > library
+ *
+ * * opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, size_t):
+ *
+ * > Set the maximum number of files that can be mapped at any time
+ * > by the library. The default (0) is unlimited.
*
* * opts(GIT_OPT_GET_SEARCH_PATH, int level, git_buf *buf)
*
diff --git a/src/mwindow.c b/src/mwindow.c
index 262786a5f..c257f0c71 100644
--- a/src/mwindow.c
+++ b/src/mwindow.c
@@ -22,11 +22,15 @@
#define DEFAULT_MAPPED_LIMIT \
((1024 * 1024) * (sizeof(void*) >= 8 ? 8192ULL : 256UL))
+/* default is unlimited */
+#define DEFAULT_FILE_LIMIT 0
+
size_t git_mwindow__window_size = DEFAULT_WINDOW_SIZE;
size_t git_mwindow__mapped_limit = DEFAULT_MAPPED_LIMIT;
+size_t git_mwindow__file_limit = DEFAULT_FILE_LIMIT;
/* Whenever you want to read or modify this, grab git__mwindow_mutex */
-static git_mwindow_ctl mem_ctl;
+git_mwindow_ctl git_mwindow__mem_ctl;
/* Global list of mwindow files, to open packs once across repos */
git_strmap *git__pack_cache = NULL;
@@ -132,7 +136,7 @@ void git_mwindow_free_all(git_mwindow_file *mwf)
*/
void git_mwindow_free_all_locked(git_mwindow_file *mwf)
{
- git_mwindow_ctl *ctl = &mem_ctl;
+ git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
size_t i;
/*
@@ -174,82 +178,143 @@ int git_mwindow_contains(git_mwindow *win, off64_t offset)
&& offset <= (off64_t)(win_off + win->window_map.len);
}
+#define GIT_MWINDOW__LRU -1
+#define GIT_MWINDOW__MRU 1
+
/*
- * Find the least-recently-used window in a file
+ * Find the least- or most-recently-used window in a file that is not currently
+ * being used. The 'only_unused' flag controls whether the caller requires the
+ * file to only have unused windows.
+ *
+ * Returns whether such a window was found in the file.
*/
-static void git_mwindow_scan_lru(
- git_mwindow_file *mwf,
- git_mwindow **lru_w,
- git_mwindow **lru_l)
+static bool git_mwindow_scan_recently_used(
+ git_mwindow_file *mwf,
+ git_mwindow **out_window,
+ git_mwindow **out_last,
+ bool only_unused,
+ int comparison_sign)
{
- git_mwindow *w, *w_l;
-
- for (w_l = NULL, w = mwf->windows; w; w = w->next) {
- if (!w->inuse_cnt) {
- /*
- * If the current one is more recent than the last one,
- * store it in the output parameter. If lru_w is NULL,
- * it's the first loop, so store it as well.
- */
- if (!*lru_w || w->last_used < (*lru_w)->last_used) {
- *lru_w = w;
- *lru_l = w_l;
- }
+ git_mwindow *w, *w_last;
+ git_mwindow *lru_window = NULL, *lru_last = NULL;
+
+ assert(mwf);
+ assert(out_window);
+
+ lru_window = *out_window;
+ if (out_last)
+ lru_last = *out_last;
+
+ for (w_last = NULL, w = mwf->windows; w; w_last = w, w = w->next) {
+ if (w->inuse_cnt) {
+ if (only_unused)
+ return false;
+ /* This window is currently being used. Skip it. */
+ continue;
+ }
+
+ /*
+ * If the current one is more (or less) recent than the last one,
+ * store it in the output parameter. If lru_window is NULL,
+ * it's the first loop, so store it as well.
+ */
+ if (!lru_window ||
+ (comparison_sign == GIT_MWINDOW__LRU && lru_window->last_used > w->last_used) ||
+ (comparison_sign == GIT_MWINDOW__MRU && lru_window->last_used < w->last_used)) {
+ lru_window = w;
+ lru_last = w_last;
}
- w_l = w;
}
+
+ if (!lru_window && !lru_last)
+ return false;
+
+ *out_window = lru_window;
+ if (out_last)
+ *out_last = lru_last;
+ return true;
}
/*
- * Close the least recently used window. You should check to see if
- * the file descriptors need closing from time to time. Called under
- * lock from new_window.
+ * Close the least recently used window (that is currently not being used) out
+ * of all the files. Called under lock from new_window.
*/
-static int git_mwindow_close_lru(git_mwindow_file *mwf)
+static int git_mwindow_close_lru_window(void)
{
- git_mwindow_ctl *ctl = &mem_ctl;
+ git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
+ git_mwindow_file *cur;
size_t i;
- git_mwindow *lru_w = NULL, *lru_l = NULL, **list = &mwf->windows;
-
- /* FIXME: Does this give us any advantage? */
- if(mwf->windows)
- git_mwindow_scan_lru(mwf, &lru_w, &lru_l);
+ git_mwindow *lru_window = NULL, *lru_last = NULL, **list = NULL;
- for (i = 0; i < ctl->windowfiles.length; ++i) {
- git_mwindow *last = lru_w;
- git_mwindow_file *cur = git_vector_get(&ctl->windowfiles, i);
- git_mwindow_scan_lru(cur, &lru_w, &lru_l);
- if (lru_w != last)
+ git_vector_foreach(&ctl->windowfiles, i, cur) {
+ if (git_mwindow_scan_recently_used(
+ cur, &lru_window, &lru_last, false, GIT_MWINDOW__LRU)) {
list = &cur->windows;
+ }
}
- if (!lru_w) {
+ if (!lru_window) {
git_error_set(GIT_ERROR_OS, "failed to close memory window; couldn't find LRU");
return -1;
}
- ctl->mapped -= lru_w->window_map.len;
- git_futils_mmap_free(&lru_w->window_map);
+ ctl->mapped -= lru_window->window_map.len;
+ git_futils_mmap_free(&lru_window->window_map);
- if (lru_l)
- lru_l->next = lru_w->next;
+ if (lru_last)
+ lru_last->next = lru_window->next;
else
- *list = lru_w->next;
+ *list = lru_window->next;
- git__free(lru_w);
+ git__free(lru_window);
ctl->open_windows--;
return 0;
}
+/*
+ * Close the file that does not have any open windows AND whose
+ * most-recently-used window is the least-recently used one across all
+ * currently open files.
+ *
+ * Called under lock from new_window.
+ */
+static int git_mwindow_close_lru_file(void)
+{
+ git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
+ git_mwindow_file *lru_file = NULL, *current_file = NULL;
+ git_mwindow *lru_window = NULL;
+ size_t i;
+
+ git_vector_foreach(&ctl->windowfiles, i, current_file) {
+ git_mwindow *mru_window = NULL;
+ if (!git_mwindow_scan_recently_used(
+ current_file, &mru_window, NULL, true, GIT_MWINDOW__MRU)) {
+ continue;
+ }
+ if (!lru_window || lru_window->last_used > mru_window->last_used)
+ lru_file = current_file;
+ }
+
+ if (!lru_file) {
+ git_error_set(GIT_ERROR_OS, "failed to close memory window file; couldn't find LRU");
+ return -1;
+ }
+
+ git_mwindow_free_all_locked(lru_file);
+ p_close(lru_file->fd);
+ lru_file->fd = -1;
+
+ return 0;
+}
+
/* This gets called under lock from git_mwindow_open */
static git_mwindow *new_window(
- git_mwindow_file *mwf,
git_file fd,
off64_t size,
off64_t offset)
{
- git_mwindow_ctl *ctl = &mem_ctl;
+ git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
size_t walign = git_mwindow__window_size / 2;
off64_t len;
git_mwindow *w;
@@ -269,7 +334,7 @@ static git_mwindow *new_window(
ctl->mapped += (size_t)len;
while (git_mwindow__mapped_limit < ctl->mapped &&
- git_mwindow_close_lru(mwf) == 0) /* nop */;
+ git_mwindow_close_lru_window() == 0) /* nop */;
/*
* We treat `mapped_limit` as a soft limit. If we can't find a
@@ -283,7 +348,7 @@ static git_mwindow *new_window(
* we're below our soft limits, so free up what we can and try again.
*/
- while (git_mwindow_close_lru(mwf) == 0)
+ while (git_mwindow_close_lru_window() == 0)
/* nop */;
if (git_futils_mmap_ro(&w->window_map, fd, w->offset, (size_t)len) < 0) {
@@ -315,7 +380,7 @@ unsigned char *git_mwindow_open(
size_t extra,
unsigned int *left)
{
- git_mwindow_ctl *ctl = &mem_ctl;
+ git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
git_mwindow *w = *cursor;
if (git_mutex_lock(&git__mwindow_mutex)) {
@@ -339,7 +404,7 @@ unsigned char *git_mwindow_open(
* one.
*/
if (!w) {
- w = new_window(mwf, mwf->fd, mwf->size, offset);
+ w = new_window(mwf->fd, mwf->size, offset);
if (w == NULL) {
git_mutex_unlock(&git__mwindow_mutex);
return NULL;
@@ -367,7 +432,7 @@ unsigned char *git_mwindow_open(
int git_mwindow_file_register(git_mwindow_file *mwf)
{
- git_mwindow_ctl *ctl = &mem_ctl;
+ git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
int ret;
if (git_mutex_lock(&git__mwindow_mutex)) {
@@ -381,6 +446,11 @@ int git_mwindow_file_register(git_mwindow_file *mwf)
return -1;
}
+ if (git_mwindow__file_limit) {
+ while (git_mwindow__file_limit <= ctl->windowfiles.length &&
+ git_mwindow_close_lru_file() == 0) /* nop */;
+ }
+
ret = git_vector_insert(&ctl->windowfiles, mwf);
git_mutex_unlock(&git__mwindow_mutex);
@@ -389,7 +459,7 @@ int git_mwindow_file_register(git_mwindow_file *mwf)
void git_mwindow_file_deregister(git_mwindow_file *mwf)
{
- git_mwindow_ctl *ctl = &mem_ctl;
+ git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
git_mwindow_file *cur;
size_t i;
diff --git a/src/settings.c b/src/settings.c
index f9f6b8497..69ebcb7ab 100644
--- a/src/settings.c
+++ b/src/settings.c
@@ -59,6 +59,7 @@ int git_libgit2_features(void)
/* Declarations for tuneable settings */
extern size_t git_mwindow__window_size;
extern size_t git_mwindow__mapped_limit;
+extern size_t git_mwindow__file_limit;
extern size_t git_indexer__max_objects;
extern bool git_disable_pack_keep_file_checks;
@@ -124,6 +125,14 @@ int git_libgit2_opts(int key, ...)
*(va_arg(ap, size_t *)) = git_mwindow__mapped_limit;
break;
+ case GIT_OPT_SET_MWINDOW_FILE_LIMIT:
+ git_mwindow__file_limit = va_arg(ap, size_t);
+ break;
+
+ case GIT_OPT_GET_MWINDOW_FILE_LIMIT:
+ *(va_arg(ap, size_t *)) = git_mwindow__file_limit;
+ break;
+
case GIT_OPT_GET_SEARCH_PATH:
if ((error = config_level_to_sysdir(va_arg(ap, int))) >= 0) {
git_buf *out = va_arg(ap, git_buf *);
diff --git a/tests/pack/filelimit.c b/tests/pack/filelimit.c
new file mode 100644
index 000000000..044679f3b
--- /dev/null
+++ b/tests/pack/filelimit.c
@@ -0,0 +1,136 @@
+#include "clar_libgit2.h"
+#include "mwindow.h"
+#include "global.h"
+
+#include <git2.h>
+#include "git2/sys/commit.h"
+#include "git2/sys/mempack.h"
+
+static size_t expected_open_mwindow_files = 0;
+static size_t original_mwindow_file_limit = 0;
+
+extern git_mwindow_ctl git_mwindow__mem_ctl;
+
+void test_pack_filelimit__initialize_tiny(void)
+{
+ expected_open_mwindow_files = 1;
+ cl_git_pass(git_libgit2_opts(GIT_OPT_GET_MWINDOW_FILE_LIMIT, &original_mwindow_file_limit));
+ cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, expected_open_mwindow_files));
+}
+
+void test_pack_filelimit__initialize_medium(void)
+{
+ expected_open_mwindow_files = 10;
+ cl_git_pass(git_libgit2_opts(GIT_OPT_GET_MWINDOW_FILE_LIMIT, &original_mwindow_file_limit));
+ cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, expected_open_mwindow_files));
+}
+
+void test_pack_filelimit__initialize_unlimited(void)
+{
+ expected_open_mwindow_files = 15;
+ cl_git_pass(git_libgit2_opts(GIT_OPT_GET_MWINDOW_FILE_LIMIT, &original_mwindow_file_limit));
+ cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, 0));
+}
+
+void test_pack_filelimit__cleanup(void)
+{
+ git_buf path = GIT_BUF_INIT;
+ cl_git_pass(git_libgit2_opts(GIT_OPT_SET_MWINDOW_FILE_LIMIT, original_mwindow_file_limit));
+
+ cl_git_pass(git_buf_joinpath(&path, clar_sandbox_path(), "repo.git"));
+ cl_fixture_cleanup(path.ptr);
+ git_buf_dispose(&path);
+}
+
+/*
+ * Create a packfile with one commit, one tree, and two blobs. The first blob
+ * (README.md) has the same content in all commits, but the second one
+ * (file.txt) has a different content in each commit.
+ */
+void create_packfile_commit(
+ git_repository *repo,
+ git_oid *out_commit_id,
+ git_oid *parent_id,
+ size_t commit_index,
+ size_t commit_count)
+{
+ git_buf file_contents = GIT_BUF_INIT;
+ git_treebuilder *treebuilder;
+ git_packbuilder *packbuilder;
+ git_signature *s;
+ git_oid oid, tree_id, commit_id;
+ const git_oid *parents[] = { parent_id };
+ size_t parent_count = parent_id ? 1 : 0;
+
+ cl_git_pass(git_treebuilder_new(&treebuilder, repo, NULL));
+
+ cl_git_pass(git_blob_create_from_buffer(&oid, repo, "", 0));
+ cl_git_pass(git_treebuilder_insert(NULL, treebuilder, "README.md", &oid, 0100644));
+
+ cl_git_pass(git_buf_printf(&file_contents, "Commit %zd/%zd", commit_index, commit_count));
+ cl_git_pass(git_blob_create_from_buffer(&oid, repo, file_contents.ptr, file_contents.size));
+ cl_git_pass(git_treebuilder_insert(NULL, treebuilder, "file.txt", &oid, 0100644));
+
+ cl_git_pass(git_treebuilder_write(&tree_id, treebuilder));
+ cl_git_pass(git_signature_now(&s, "alice", "alice@example.com"));
+ cl_git_pass(git_commit_create_from_ids(&commit_id, repo, "refs/heads/master", s, s,
+ NULL, file_contents.ptr, &tree_id, parent_count, parents));
+
+ cl_git_pass(git_packbuilder_new(&packbuilder, repo));
+ cl_git_pass(git_packbuilder_insert_commit(packbuilder, &commit_id));
+ cl_git_pass(git_packbuilder_write(packbuilder, NULL, 0, NULL, NULL));
+
+ cl_git_pass(git_oid_cpy(out_commit_id, &commit_id));
+
+ git_buf_dispose(&file_contents);
+ git_treebuilder_free(treebuilder);
+ git_packbuilder_free(packbuilder);
+ git_signature_free(s);
+}
+
+void test_pack_filelimit__open_repo_with_multiple_packfiles(void)
+{
+ git_buf path = GIT_BUF_INIT;
+ git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
+ git_repository *repo;
+ git_revwalk *walk;
+ git_oid id, *parent_id = NULL;
+ size_t i;
+ const size_t commit_count = 16;
+ unsigned int open_windows;
+
+ /*
+ * Create a repository and populate it with 16 commits, each in its own
+ * packfile.
+ */
+ cl_git_pass(git_buf_joinpath(&path, clar_sandbox_path(), "repo.git"));
+ cl_git_pass(git_repository_init(&repo, path.ptr, true));
+ for (i = 0; i < commit_count; ++i) {
+ create_packfile_commit(repo, &id, parent_id, i + 1, commit_count);
+ parent_id = &id;
+ }
+
+ cl_git_pass(git_revwalk_new(&walk, repo));
+ cl_git_pass(git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL));
+ cl_git_pass(git_revwalk_push_ref(walk, "refs/heads/master"));
+
+ /* Walking the repository requires eventually opening each of the packfiles. */
+ i = 0;
+ while (git_revwalk_next(&id, walk) == 0)
+ ++i;
+ cl_assert_equal_i(commit_count, i);
+
+ cl_git_pass(git_mutex_lock(&git__mwindow_mutex));
+ /*
+ * Adding an assert while holding a lock will cause the whole process to
+ * deadlock. Copy the value and do the assert after releasing the lock.
+ */
+ open_windows = ctl->open_windows;
+ cl_git_pass(git_mutex_unlock(&git__mwindow_mutex));
+
+ cl_assert_equal_i(expected_open_mwindow_files, open_windows);
+
+ git_buf_dispose(&path);
+ git_revwalk_free(walk);
+ git_repository_free(repo);
+}