summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos Martín Nieto <cmn@dwim.me>2018-04-01 14:01:52 +0200
committerCarlos Martín Nieto <cmn@dwim.me>2018-04-01 14:11:34 +0200
commit2a6d0956f0fb6e4c0b9ff2455b4449701bd8e6a2 (patch)
tree598c73f0c6087b158cd714d56bba881c2788a67f
parentdc27772ce2b3d9e34c5e97fb0eb010e21c96ee33 (diff)
downloadlibgit2-2a6d0956f0fb6e4c0b9ff2455b4449701bd8e6a2.tar.gz
revwalk: avoid walking the entire history when output is unsorted
As part of reducing our divergence from git, its code for revwalk was ported into our codebase. A detail about when to limit the list was lost and we ended up always calling that code. Limiting the list means performing the walk and creating the final list of commits to be output during the preparation stage. This is unavoidable when sorting and when there are negative refs. We did this even when asked for unsorted output with no negative refs, which you might do to retrieve something like the "last 10 commits on HEAD" for a nominally unsorted meaning of "last". This commit adds and sets a flag indicating when we do need to limit the list, letting us avoid doing so when we can. The previously mentioned query thus no longer loads the entire history of the project during the prepare stage, but loads it iteratively during the walk.
-rw-r--r--src/revwalk.c66
-rw-r--r--src/revwalk.h3
2 files changed, 59 insertions, 10 deletions
diff --git a/src/revwalk.c b/src/revwalk.c
index b1aa8f91a..d913987b7 100644
--- a/src/revwalk.c
+++ b/src/revwalk.c
@@ -15,6 +15,8 @@
#include "merge.h"
#include "vector.h"
+static int get_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list);
+
git_commit_list_node *git_revwalk__commit_lookup(
git_revwalk *walk, const git_oid *oid)
{
@@ -76,10 +78,12 @@ static int push_commit(git_revwalk *walk, const git_oid *oid, int uninteresting,
if (commit->uninteresting)
return 0;
- if (uninteresting)
+ if (uninteresting) {
+ walk->limited = 1;
walk->did_hide = 1;
- else
+ } else {
walk->did_push = 1;
+ }
commit->uninteresting = uninteresting;
list = walk->user_input;
@@ -245,9 +249,10 @@ static int revwalk_next_timesort(git_commit_list_node **object_out, git_revwalk
static int revwalk_next_unsorted(git_commit_list_node **object_out, git_revwalk *walk)
{
+ int error;
git_commit_list_node *next;
- while ((next = git_commit_list_pop(&walk->iterator_rand)) != NULL) {
+ while (!(error = get_revision(&next, walk, &walk->iterator_rand))) {
/* Some commits might become uninteresting after being added to the list */
if (!next->uninteresting) {
*object_out = next;
@@ -255,15 +260,15 @@ static int revwalk_next_unsorted(git_commit_list_node **object_out, git_revwalk
}
}
- giterr_clear();
- return GIT_ITEROVER;
+ return error;
}
static int revwalk_next_toposort(git_commit_list_node **object_out, git_revwalk *walk)
{
+ int error;
git_commit_list_node *next;
- while ((next = git_commit_list_pop(&walk->iterator_topo)) != NULL) {
+ while (!(error = get_revision(&next, walk, &walk->iterator_topo))) {
/* Some commits might become uninteresting after being added to the list */
if (!next->uninteresting) {
*object_out = next;
@@ -271,8 +276,7 @@ static int revwalk_next_toposort(git_commit_list_node **object_out, git_revwalk
}
}
- giterr_clear();
- return GIT_ITEROVER;
+ return error;
}
static int revwalk_next_reverse(git_commit_list_node **object_out, git_revwalk *walk)
@@ -449,6 +453,45 @@ static int limit_list(git_commit_list **out, git_revwalk *walk, git_commit_list
return 0;
}
+static int get_one_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list)
+{
+ int error;
+ git_commit_list_node *commit;
+
+ while(true) {
+ commit = git_commit_list_pop(list);
+ if (!commit) {
+ giterr_clear();
+ return GIT_ITEROVER;
+ }
+
+ /*
+ * If we did not run limit_list and we must add parents to the
+ * list ourselves.
+ */
+ if (!walk->limited) {
+ if ((error = add_parents_to_list(walk, commit, list)) < 0)
+ return error;
+ }
+
+ *out = commit;
+ return 0;
+ }
+}
+
+static int get_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list)
+{
+ int error;
+ git_commit_list_node *commit;
+
+ if ((error = get_one_revision(&commit, walk, list)) < 0)
+ return error;
+
+ /* Here is where we would handle boundary commits if we implement that */
+ *out = commit;
+ return 0;
+}
+
static int sort_in_topological_order(git_commit_list **out, git_revwalk *walk, git_commit_list *list)
{
git_commit_list *ll = NULL, *newlist, **pptr;
@@ -561,7 +604,7 @@ static int prepare_walk(git_revwalk *walk)
}
}
- if ((error = limit_list(&commits, walk, commits)) < 0)
+ if (walk->limited && (error = limit_list(&commits, walk, commits)) < 0)
return error;
if (walk->sorting & GIT_SORT_TOPOLOGICAL) {
@@ -664,6 +707,9 @@ void git_revwalk_sorting(git_revwalk *walk, unsigned int sort_mode)
walk->get_next = &revwalk_next_unsorted;
walk->enqueue = &revwalk_enqueue_unsorted;
}
+
+ if (sort_mode != GIT_SORT_NONE)
+ walk->limited = 1;
}
void git_revwalk_simplify_first_parent(git_revwalk *walk)
@@ -719,6 +765,7 @@ void git_revwalk_reset(git_revwalk *walk)
git_commit_list_free(&walk->user_input);
walk->first_parent = 0;
walk->walking = 0;
+ walk->limited = 0;
walk->did_push = walk->did_hide = 0;
}
@@ -740,6 +787,7 @@ int git_revwalk_add_hide_cb(
walk->hide_cb = hide_cb;
walk->hide_cb_payload = payload;
+ walk->limited = 1;
return 0;
}
diff --git a/src/revwalk.h b/src/revwalk.h
index 578328b72..923a2bc80 100644
--- a/src/revwalk.h
+++ b/src/revwalk.h
@@ -36,7 +36,8 @@ struct git_revwalk {
unsigned walking:1,
first_parent: 1,
did_hide: 1,
- did_push: 1;
+ did_push: 1,
+ limited: 1;
unsigned int sorting;
/* the pushes and hides */