summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2011-03-08 14:57:03 +0200
committerVicent Marti <tanoku@gmail.com>2011-03-14 23:52:15 +0200
commit71db842fac3ba8582255bc5b61361ddef08ef105 (patch)
treef4c4efa3860da60ef0f3e86c5ab6c1b4dad2d1fc
parent26022f0719f652ae8311abea6f6de92bd4a75a87 (diff)
downloadlibgit2-71db842fac3ba8582255bc5b61361ddef08ef105.tar.gz
Rewrite the Revision Walker
The new revision walker uses an internal Commit object storage system, custom memory allocator and much improved topological and time sorting algorithms. It's about 20x times faster than the previous implementation when browsing big repositories. The following external API calls have changed: `git_revwalk_next` returns an OID instead of a full commit object. The initial call to `git_revwalk_next` is no longer blocking when iterating through a repo with a time-sorting mode. Iterating with Topological or inverted modes still makes the initial call blocking to preprocess the commit list, but this block should be mostly unnoticeable on most repositories (topological preprocessing times at 0.3s on the git.git repo). `git_revwalk_push` and `git_revwalk_hide` now take an OID instead of a full commit object.
-rw-r--r--include/git2/revwalk.h8
-rw-r--r--src/pqueue.c153
-rw-r--r--src/pqueue.h92
-rw-r--r--src/repository.c2
-rw-r--r--src/revwalk.c618
-rw-r--r--src/revwalk.h56
-rw-r--r--src/util.h2
-rw-r--r--tests/t05-revwalk.c109
8 files changed, 636 insertions, 404 deletions
diff --git a/include/git2/revwalk.h b/include/git2/revwalk.h
index 841110499..fdbbe236c 100644
--- a/include/git2/revwalk.h
+++ b/include/git2/revwalk.h
@@ -27,6 +27,7 @@
#include "common.h"
#include "types.h"
+#include "object.h"
/**
* @file git2/revwalk.h
@@ -88,14 +89,15 @@ GIT_EXTERN(void) git_revwalk_reset(git_revwalk *walker);
* @param walker the walker being used for the traversal.
* @param commit the commit to start from.
*/
-GIT_EXTERN(int) git_revwalk_push(git_revwalk *walk, git_commit *commit);
+GIT_EXTERN(int) git_revwalk_push(git_revwalk *walk, const git_oid *oid);
+
/**
* Mark a commit (and its ancestors) uninteresting for the output.
* @param walker the walker being used for the traversal.
* @param commit the commit that will be ignored during the traversal
*/
-GIT_EXTERN(int) git_revwalk_hide(git_revwalk *walk, git_commit *commit);
+GIT_EXTERN(int) git_revwalk_hide(git_revwalk *walk, const git_oid *oid);
/**
* Get the next commit from the revision traversal.
@@ -105,7 +107,7 @@ GIT_EXTERN(int) git_revwalk_hide(git_revwalk *walk, git_commit *commit);
* @return GIT_SUCCESS if the next commit was found;
* GIT_EREVWALKOVER if there are no commits left to iterate
*/
-GIT_EXTERN(int) git_revwalk_next(git_commit **commit, git_revwalk *walk);
+GIT_EXTERN(int) git_revwalk_next(git_oid *oid, git_revwalk *walk);
/**
* Change the sorting mode when iterating through the
diff --git a/src/pqueue.c b/src/pqueue.c
new file mode 100644
index 000000000..98152cb85
--- /dev/null
+++ b/src/pqueue.c
@@ -0,0 +1,153 @@
+/*
+ * BORING COPYRIGHT NOTICE:
+ *
+ * This file is a heavily modified version of the priority queue found
+ * in the Apache project and the libpqueue library.
+ *
+ * https://github.com/vy/libpqueue
+ *
+ * These are the original authors:
+ *
+ * Copyright 2010 Volkan Yazıcı <volkan.yazici@gmail.com>
+ * Copyright 2006-2010 The Apache Software Foundation
+ *
+ * This file is licensed under the Apache 2.0 license, which
+ * supposedly makes it compatible with the GPLv2 that libgit2 uses.
+ *
+ * Check the Apache license at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * So much licensing trouble for a binary heap. Oh well.
+ */
+
+#include "common.h"
+#include "pqueue.h"
+
+#define left(i) ((i) << 1)
+#define right(i) (((i) << 1) + 1)
+#define parent(i) ((i) >> 1)
+
+int git_pqueue_init(git_pqueue *q, size_t n, git_pqueue_cmp cmppri)
+{
+ assert(q);
+
+ /* Need to allocate n+1 elements since element 0 isn't used. */
+ if ((q->d = malloc((n + 1) * sizeof(void *))) == NULL)
+ return GIT_ENOMEM;
+
+ q->size = 1;
+ q->avail = q->step = (n + 1); /* see comment above about n+1 */
+ q->cmppri = cmppri;
+
+ return GIT_SUCCESS;
+}
+
+
+void git_pqueue_free(git_pqueue *q)
+{
+ free(q->d);
+ q->d = NULL;
+}
+
+
+size_t git_pqueue_size(git_pqueue *q)
+{
+ /* queue element 0 exists but doesn't count since it isn't used. */
+ return (q->size - 1);
+}
+
+
+static void bubble_up(git_pqueue *q, size_t i)
+{
+ size_t parent_node;
+ void *moving_node = q->d[i];
+
+ for (parent_node = parent(i);
+ ((i > 1) && q->cmppri(q->d[parent_node], moving_node));
+ i = parent_node, parent_node = parent(i)) {
+ q->d[i] = q->d[parent_node];
+ }
+
+ q->d[i] = moving_node;
+}
+
+
+static size_t maxchild(git_pqueue *q, size_t i)
+{
+ size_t child_node = left(i);
+
+ if (child_node >= q->size)
+ return 0;
+
+ if ((child_node + 1) < q->size &&
+ q->cmppri(q->d[child_node], q->d[child_node + 1]))
+ child_node++; /* use right child instead of left */
+
+ return child_node;
+}
+
+
+static void percolate_down(git_pqueue *q, size_t i)
+{
+ size_t child_node;
+ void *moving_node = q->d[i];
+
+ while ((child_node = maxchild(q, i)) != 0 &&
+ q->cmppri(moving_node, q->d[child_node])) {
+ q->d[i] = q->d[child_node];
+ i = child_node;
+ }
+
+ q->d[i] = moving_node;
+}
+
+
+int git_pqueue_insert(git_pqueue *q, void *d)
+{
+ void *tmp;
+ size_t i;
+ size_t newsize;
+
+ if (!q) return 1;
+
+ /* allocate more memory if necessary */
+ if (q->size >= q->avail) {
+ newsize = q->size + q->step;
+ if ((tmp = realloc(q->d, sizeof(void *) * newsize)) == NULL)
+ return GIT_ENOMEM;
+
+ q->d = tmp;
+ q->avail = newsize;
+ }
+
+ /* insert item */
+ i = q->size++;
+ q->d[i] = d;
+ bubble_up(q, i);
+
+ return GIT_SUCCESS;
+}
+
+
+void *git_pqueue_pop(git_pqueue *q)
+{
+ void *head;
+
+ if (!q || q->size == 1)
+ return NULL;
+
+ head = q->d[1];
+ q->d[1] = q->d[--q->size];
+ percolate_down(q, 1);
+
+ return head;
+}
+
+
+void *git_pqueue_peek(git_pqueue *q)
+{
+ if (!q || q->size == 1)
+ return NULL;
+ return q->d[1];
+}
diff --git a/src/pqueue.h b/src/pqueue.h
new file mode 100644
index 000000000..6db74661d
--- /dev/null
+++ b/src/pqueue.h
@@ -0,0 +1,92 @@
+/*
+ * BORING COPYRIGHT NOTICE:
+ *
+ * This file is a heavily modified version of the priority queue found
+ * in the Apache project and the libpqueue library.
+ *
+ * https://github.com/vy/libpqueue
+ *
+ * These are the original authors:
+ *
+ * Copyright 2010 Volkan Yazıcı <volkan.yazici@gmail.com>
+ * Copyright 2006-2010 The Apache Software Foundation
+ *
+ * This file is licensed under the Apache 2.0 license, which
+ * supposedly makes it compatible with the GPLv2 that libgit2 uses.
+ *
+ * Check the Apache license at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * So much licensing trouble for a binary heap. Oh well.
+ */
+
+#ifndef INCLUDE_pqueue_h__
+#define INCLUDE_pqueue_h__
+
+/** callback functions to get/set/compare the priority of an element */
+typedef int (*git_pqueue_cmp)(void *a, void *b);
+
+/** the priority queue handle */
+typedef struct {
+ size_t size, avail, step;
+ git_pqueue_cmp cmppri;
+ void **d;
+} git_pqueue;
+
+
+/**
+ * initialize the queue
+ *
+ * @param n the initial estimate of the number of queue items for which memory
+ * should be preallocated
+ * @param cmppri the callback function to compare two nodes of the queue
+ *
+ * @Return the handle or NULL for insufficent memory
+ */
+int git_pqueue_init(git_pqueue *q, size_t n, git_pqueue_cmp cmppri);
+
+
+/**
+ * free all memory used by the queue
+ * @param q the queue
+ */
+void git_pqueue_free(git_pqueue *q);
+
+
+/**
+ * return the size of the queue.
+ * @param q the queue
+ */
+size_t git_pqueue_size(git_pqueue *q);
+
+
+/**
+ * insert an item into the queue.
+ * @param q the queue
+ * @param d the item
+ * @return 0 on success
+ */
+int git_pqueue_insert(git_pqueue *q, void *d);
+
+
+/**
+ * pop the highest-ranking item from the queue.
+ * @param p the queue
+ * @param d where to copy the entry to
+ * @return NULL on error, otherwise the entry
+ */
+void *git_pqueue_pop(git_pqueue *q);
+
+
+/**
+ * access highest-ranking item without removing it.
+ * @param q the queue
+ * @param d the entry
+ * @return NULL on error, otherwise the entry
+ */
+void *git_pqueue_peek(git_pqueue *q);
+
+#endif /* PQUEUE_H */
+/** @} */
+
diff --git a/src/repository.c b/src/repository.c
index f2cb985af..c24560665 100644
--- a/src/repository.c
+++ b/src/repository.c
@@ -53,7 +53,7 @@ typedef struct {
* Callbacks for the ODB cache, implemented
* as a hash table
*/
-uint32_t object_table_hash(const void *key, int hash_id)
+static uint32_t object_table_hash(const void *key, int hash_id)
{
uint32_t r;
git_oid *id;
diff --git a/src/revwalk.c b/src/revwalk.c
index a1cd0ebb7..edafbe73d 100644
--- a/src/revwalk.c
+++ b/src/revwalk.c
@@ -27,22 +27,132 @@
#include "commit.h"
#include "revwalk.h"
#include "hashtable.h"
+#include "pqueue.h"
-uint32_t git_revwalk__commit_hash(const void *key, int hash_id)
+typedef struct commit_object {
+ git_oid oid;
+ uint32_t time;
+ unsigned int seen:1,
+ uninteresting:1,
+ topo_delay:1,
+ parsed:1;
+
+ unsigned short in_degree;
+ unsigned short out_degree;
+
+ struct commit_object **parents;
+} commit_object;
+
+typedef struct commit_list {
+ commit_object *item;
+ struct commit_list *next;
+} commit_list;
+
+struct git_revwalk {
+ git_repository *repo;
+
+ git_hashtable *commits;
+ git_vector pending;
+
+ commit_list *iterator_topo;
+ commit_list *iterator_rand;
+ commit_list *iterator_reverse;
+ git_pqueue iterator_time;
+
+ int (*get_next)(commit_object **, git_revwalk *);
+ int (*enqueue)(git_revwalk *, commit_object *);
+
+ git_vector memory_alloc;
+ size_t chunk_size;
+
+ unsigned walking:1;
+ unsigned int sorting;
+};
+
+commit_list *commit_list_insert(commit_object *item, commit_list **list_p)
+{
+ commit_list *new_list = git__malloc(sizeof(commit_list));
+ new_list->item = item;
+ new_list->next = *list_p;
+ *list_p = new_list;
+ return new_list;
+}
+
+void commit_list_free(commit_list *list)
+{
+ while (list) {
+ commit_list *temp = list;
+ list = temp->next;
+ free(temp);
+ }
+}
+
+commit_object *commit_list_pop(commit_list **stack)
+{
+ commit_list *top = *stack;
+ commit_object *item = top ? top->item : NULL;
+
+ if (top) {
+ *stack = top->next;
+ free(top);
+ }
+ return item;
+}
+
+static int commit_time_cmp(void *a, void *b)
+{
+ commit_object *commit_a = (commit_object *)a;
+ commit_object *commit_b = (commit_object *)b;
+
+ return (commit_a->time < commit_b->time);
+}
+
+static uint32_t object_table_hash(const void *key, int hash_id)
{
uint32_t r;
- git_commit *commit;
+ git_oid *id;
- commit = (git_commit *)key;
- memcpy(&r, commit->object.id.id + (hash_id * sizeof(uint32_t)), sizeof(r));
+ id = (git_oid *)key;
+ memcpy(&r, id->id + (hash_id * sizeof(uint32_t)), sizeof(r));
return r;
}
-int git_revwalk__commit_keycmp(const void *key_a, const void *key_b)
+#define COMMITS_PER_CHUNK 128
+#define CHUNK_STEP 64
+#define PARENTS_PER_COMMIT ((CHUNK_STEP - sizeof(commit_object)) / sizeof(commit_object *))
+
+static int alloc_chunk(git_revwalk *walk)
{
- git_commit *a = (git_commit *)key_a;
- git_commit *b = (git_commit *)key_b;
- return git_oid_cmp(&a->object.id, &b->object.id);
+ void *chunk;
+
+ chunk = git__calloc(COMMITS_PER_CHUNK, CHUNK_STEP);
+ if (chunk == NULL)
+ return GIT_ENOMEM;
+
+ walk->chunk_size = 0;
+ return git_vector_insert(&walk->memory_alloc, chunk);
+}
+
+static commit_object *alloc_commit(git_revwalk *walk)
+{
+ unsigned char *chunk;
+
+ if (walk->chunk_size == COMMITS_PER_CHUNK)
+ alloc_chunk(walk);
+
+ chunk = git_vector_get(&walk->memory_alloc, walk->memory_alloc.length - 1);
+ chunk += (walk->chunk_size * CHUNK_STEP);
+ walk->chunk_size++;
+
+ return (commit_object *)chunk;
+}
+
+static commit_object **alloc_parents(commit_object *commit, size_t n_parents)
+{
+ if (n_parents <= PARENTS_PER_COMMIT)
+ return (commit_object **)((unsigned char *)commit + sizeof(commit_object));
+
+ return git__malloc(n_parents * sizeof(commit_object *));
}
int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo)
@@ -56,14 +166,18 @@ int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo)
memset(walk, 0x0, sizeof(git_revwalk));
walk->commits = git_hashtable_alloc(64,
- git_revwalk__commit_hash,
- git_revwalk__commit_keycmp);
+ object_table_hash,
+ (git_hash_keyeq_ptr)git_oid_cmp);
if (walk->commits == NULL) {
free(walk);
return GIT_ENOMEM;
}
+ git_vector_init(&walk->pending, 8, NULL);
+ git_vector_init(&walk->memory_alloc, 8, NULL);
+ alloc_chunk(walk);
+
walk->repo = repo;
*revwalk_out = walk;
@@ -72,11 +186,20 @@ int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo)
void git_revwalk_free(git_revwalk *walk)
{
+ unsigned int i;
+
if (walk == NULL)
return;
git_revwalk_reset(walk);
git_hashtable_free(walk->commits);
+ git_vector_free(&walk->pending);
+
+ for (i = 0; i < walk->memory_alloc.length; ++i) {
+ free(git_vector_get(&walk->memory_alloc, i));
+ }
+
+ git_vector_free(&walk->memory_alloc);
free(walk);
}
@@ -98,364 +221,349 @@ int git_revwalk_sorting(git_revwalk *walk, unsigned int sort_mode)
return GIT_SUCCESS;
}
-static git_revwalk_commit *commit_to_walkcommit(git_revwalk *walk, git_commit *commit_object)
+static commit_object *commit_lookup(git_revwalk *walk, const git_oid *oid)
{
- git_revwalk_commit *commit;
-
- commit = (git_revwalk_commit *)git_hashtable_lookup(walk->commits, commit_object);
+ commit_object *commit;
- if (commit != NULL)
+ if ((commit = git_hashtable_lookup(walk->commits, oid)) != NULL)
return commit;
- commit = git__malloc(sizeof(git_revwalk_commit));
+ commit = alloc_commit(walk);
if (commit == NULL)
return NULL;
- memset(commit, 0x0, sizeof(git_revwalk_commit));
-
- commit->commit_object = commit_object;
- GIT_OBJECT_INCREF(walk->repo, commit_object);
+ git_oid_cpy(&commit->oid, oid);
- git_hashtable_insert(walk->commits, commit_object, commit);
+ if (git_hashtable_insert(walk->commits, &commit->oid, commit) < GIT_SUCCESS) {
+ free(commit);
+ return NULL;
+ }
return commit;
}
-static git_revwalk_commit *insert_commit(git_revwalk *walk, git_commit *commit_object)
+static int commit_quick_parse(git_revwalk *walk, commit_object *commit, git_rawobj *raw)
{
- git_revwalk_commit *commit;
- unsigned int i;
+ const int parent_len = STRLEN("parent ") + GIT_OID_HEXSZ + 1;
- assert(walk && commit_object);
+ unsigned char *buffer = raw->data;
+ unsigned char *buffer_end = buffer + raw->len;
+ unsigned char *parents_start;
- if (commit_object->object.repo != walk->repo || walk->walking)
- return NULL;
+ int i, parents = 0;
- commit = commit_to_walkcommit(walk, commit_object);
- if (commit == NULL)
- return NULL;
+ buffer += STRLEN("tree ") + GIT_OID_HEXSZ + 1;
- if (commit->seen)
- return commit;
+ parents_start = buffer;
+ while (buffer + parent_len < buffer_end && memcmp(buffer, "parent ", STRLEN("parent ")) == 0) {
+ parents++;
+ buffer += parent_len;
+ }
- commit->seen = 1;
+ commit->parents = alloc_parents(commit, parents);
+ if (commit->parents == NULL)
+ return GIT_ENOMEM;
- for (i = 0; i < commit->commit_object->parents.length; ++i) {
- git_commit *parent_object;
- git_revwalk_commit *parent;
+ buffer = parents_start;
+ for (i = 0; i < parents; ++i) {
+ git_oid oid;
- parent_object = git_vector_get(&commit->commit_object->parents, i);
+ if (git_oid_mkstr(&oid, (char *)buffer + STRLEN("parent ")) < GIT_SUCCESS)
+ return GIT_EOBJCORRUPTED;
- if ((parent = commit_to_walkcommit(walk, parent_object)) == NULL)
- return NULL;
+ commit->parents[i] = commit_lookup(walk, &oid);
+ if (commit->parents[i] == NULL)
+ return GIT_ENOMEM;
- parent = insert_commit(walk, parent_object);
- if (parent == NULL)
- return NULL;
+ buffer += parent_len;
+ }
- parent->in_degree++;
+ commit->out_degree = (unsigned short)parents;
- git_revwalk_list_push_back(&commit->parents, parent);
- }
+ if ((buffer = memchr(buffer, '\n', buffer_end - buffer)) == NULL)
+ return GIT_EOBJCORRUPTED;
- if (git_revwalk_list_push_back(&walk->iterator, commit))
- return NULL;
+ buffer = memchr(buffer, '>', buffer_end - buffer);
+ if (buffer == NULL)
+ return GIT_EOBJCORRUPTED;
- return commit;
-}
+ commit->time = strtol((char *)buffer + 2, NULL, 10);
+ if (commit->time == 0)
+ return GIT_EOBJCORRUPTED;
-int git_revwalk_push(git_revwalk *walk, git_commit *commit)
-{
- assert(walk && commit);
- return insert_commit(walk, commit) ? GIT_SUCCESS : GIT_ENOMEM;
+ commit->parsed = 1;
+ return GIT_SUCCESS;
}
-static void mark_uninteresting(git_revwalk_commit *commit)
+static int commit_parse(git_revwalk *walk, commit_object *commit)
{
- git_revwalk_listnode *parent;
+ git_rawobj data;
+ int error;
- assert(commit);
+ if (commit->parsed)
+ return GIT_SUCCESS;
- commit->uninteresting = 1;
- parent = commit->parents.head;
+ if ((error = git_odb_read(&data, walk->repo->db, &commit->oid)) < GIT_SUCCESS)
+ return error;
- while (parent) {
- mark_uninteresting(parent->walk_commit);
- parent = parent->next;
+ if (data.type != GIT_OBJ_COMMIT) {
+ git_rawobj_close(&data);
+ return GIT_EOBJTYPE;
}
+
+ error = commit_quick_parse(walk, commit, &data);
+ git_rawobj_close(&data);
+ return error;
}
-int git_revwalk_hide(git_revwalk *walk, git_commit *commit)
+static void mark_uninteresting(commit_object *commit)
{
- git_revwalk_commit *hide;
+ unsigned short i;
+ assert(commit);
- assert(walk && commit);
-
- hide = insert_commit(walk, commit);
- if (hide == NULL)
- return GIT_ENOMEM;
+ commit->uninteresting = 1;
- mark_uninteresting(hide);
- return GIT_SUCCESS;
+ for (i = 0; i < commit->out_degree; ++i)
+ if (!commit->parents[i]->uninteresting)
+ mark_uninteresting(commit->parents[i]);
}
-
-static void prepare_walk(git_revwalk *walk)
+static int process_commit(git_revwalk *walk, commit_object *commit)
{
- if (walk->sorting & GIT_SORT_TIME)
- git_revwalk_list_timesort(&walk->iterator);
-
- if (walk->sorting & GIT_SORT_TOPOLOGICAL)
- git_revwalk_list_toposort(&walk->iterator);
+ int error;
- if (walk->sorting & GIT_SORT_REVERSE)
- walk->next = &git_revwalk_list_pop_back;
- else
- walk->next = &git_revwalk_list_pop_front;
+ if (commit->seen)
+ return GIT_SUCCESS;
- walk->walking = 1;
-}
+ commit->seen = 1;
-int git_revwalk_next(git_commit **commit, git_revwalk *walk)
-{
- git_revwalk_commit *next;
+ if ((error = commit_parse(walk, commit)) < GIT_SUCCESS)
+ return error;
- assert(walk && commit);
+ if (commit->uninteresting)
+ mark_uninteresting(commit);
- if (!walk->walking)
- prepare_walk(walk);
+ return walk->enqueue(walk, commit);
+}
- *commit = NULL;
+static int process_commit_parents(git_revwalk *walk, commit_object *commit)
+{
+ unsigned short i;
+ int error = GIT_SUCCESS;
- while ((next = walk->next(&walk->iterator)) != NULL) {
- if (!next->uninteresting) {
- *commit = next->commit_object;
- GIT_OBJECT_INCREF(walk->repo, *commit);
- return GIT_SUCCESS;
- }
+ for (i = 0; i < commit->out_degree && error == GIT_SUCCESS; ++i) {
+ error = process_commit(walk, commit->parents[i]);
}
- /* No commits left to iterate */
- git_revwalk_reset(walk);
- return GIT_EREVWALKOVER;
+ return error;
}
-void git_revwalk_reset(git_revwalk *walk)
+static int push_commit(git_revwalk *walk, const git_oid *oid, int uninteresting)
{
- const void *_unused;
- git_revwalk_commit *commit;
+ commit_object *commit;
- assert(walk);
+ commit = commit_lookup(walk, oid);
+ if (commit == NULL)
+ return GIT_ENOTFOUND;
- GIT_HASHTABLE_FOREACH(walk->commits, _unused, commit, {
- GIT_OBJECT_DECREF(walk->repo, commit->commit_object);
- git_revwalk_list_clear(&commit->parents);
- free(commit);
- });
+ if (uninteresting)
+ mark_uninteresting(commit);
- git_hashtable_clear(walk->commits);
- git_revwalk_list_clear(&walk->iterator);
- walk->walking = 0;
+ return git_vector_insert(&walk->pending, commit);
}
+int git_revwalk_push(git_revwalk *walk, const git_oid *oid)
+{
+ assert(walk && oid);
+ return push_commit(walk, oid, 0);
+}
+int git_revwalk_hide(git_revwalk *walk, const git_oid *oid)
+{
+ assert(walk && oid);
+ return push_commit(walk, oid, 1);
+}
-
-
-
-int git_revwalk_list_push_back(git_revwalk_list *list, git_revwalk_commit *commit)
+static int revwalk_enqueue_timesort(git_revwalk *walk, commit_object *commit)
{
- git_revwalk_listnode *node = NULL;
+ return git_pqueue_insert(&walk->iterator_time, commit);
+}
- node = git__malloc(sizeof(git_revwalk_listnode));
+static int revwalk_enqueue_unsorted(git_revwalk *walk, commit_object *commit)
+{
+ return commit_list_insert(commit, &walk->iterator_rand) ? GIT_SUCCESS : GIT_ENOMEM;
+}
- if (node == NULL)
- return GIT_ENOMEM;
+static int revwalk_next_timesort(commit_object **object_out, git_revwalk *walk)
+{
+ int error;
+ commit_object *next;
- node->walk_commit = commit;
- node->next = NULL;
- node->prev = list->tail;
+ while ((next = git_pqueue_pop(&walk->iterator_time)) != NULL) {
+ if ((error = process_commit_parents(walk, next)) < GIT_SUCCESS)
+ return error;
- if (list->tail == NULL) {
- list->head = list->tail = node;
- } else {
- list->tail->next = node;
- list->tail = node;
+ if (!next->uninteresting) {
+ *object_out = next;
+ return GIT_SUCCESS;
+ }
}
- list->size++;
- return 0;
+ return GIT_EREVWALKOVER;
}
-int git_revwalk_list_push_front(git_revwalk_list *list, git_revwalk_commit *commit)
+static int revwalk_next_unsorted(commit_object **object_out, git_revwalk *walk)
{
- git_revwalk_listnode *node = NULL;
-
- node = git__malloc(sizeof(git_revwalk_listnode));
+ int error;
+ commit_object *next;
- if (node == NULL)
- return GIT_ENOMEM;
+ while ((next = commit_list_pop(&walk->iterator_rand)) != NULL) {
+ if ((error = process_commit_parents(walk, next)) < GIT_SUCCESS)
+ return error;
- node->walk_commit = commit;
- node->next = list->head;
- node->prev = NULL;
-
- if (list->head == NULL) {
- list->head = list->tail = node;
- } else {
- list->head->prev = node;
- list->head = node;
+ if (!next->uninteresting) {
+ *object_out = next;
+ return GIT_SUCCESS;
+ }
}
- list->size++;
- return 0;
+ return GIT_EREVWALKOVER;
}
-
-git_revwalk_commit *git_revwalk_list_pop_back(git_revwalk_list *list)
+static int revwalk_next_toposort(commit_object **object_out, git_revwalk *walk)
{
- git_revwalk_listnode *node;
- git_revwalk_commit *commit;
+ commit_object *next;
+ unsigned short i;
- if (list->tail == NULL)
- return NULL;
+ for (;;) {
+ next = commit_list_pop(&walk->iterator_topo);
+ if (next == NULL)
+ return GIT_EREVWALKOVER;
- node = list->tail;
- list->tail = list->tail->prev;
- if (list->tail == NULL)
- list->head = NULL;
- else
- list->tail->next = NULL;
+ if (next->in_degree > 0) {
+ next->topo_delay = 1;
+ continue;
+ }
- commit = node->walk_commit;
- free(node);
+ for (i = 0; i < next->out_degree; ++i) {
+ commit_object *parent = next->parents[i];
- list->size--;
+ if (--parent->in_degree == 0 && parent->topo_delay) {
+ parent->topo_delay = 0;
+ commit_list_insert(parent, &walk->iterator_topo);
+ }
+ }
- return commit;
+ *object_out = next;
+ return GIT_SUCCESS;
+ }
}
-git_revwalk_commit *git_revwalk_list_pop_front(git_revwalk_list *list)
+static int revwalk_next_reverse(commit_object **object_out, git_revwalk *walk)
{
- git_revwalk_listnode *node;
- git_revwalk_commit *commit;
-
- if (list->head == NULL)
- return NULL;
-
- node = list->head;
- list->head = list->head->next;
- if (list->head == NULL)
- list->tail = NULL;
- else
- list->head->prev = NULL;
+ *object_out = commit_list_pop(&walk->iterator_reverse);
+ return *object_out ? GIT_SUCCESS : GIT_EREVWALKOVER;
+}
- commit = node->walk_commit;
- free(node);
- list->size--;
+static int prepare_walk(git_revwalk *walk)
+{
+ unsigned int i;
+ int error;
- return commit;
-}
+ if (walk->sorting & GIT_SORT_TIME) {
+ if ((error = git_pqueue_init(&walk->iterator_time, 32, commit_time_cmp)) < GIT_SUCCESS)
+ return error;
-void git_revwalk_list_clear(git_revwalk_list *list)
-{
- git_revwalk_listnode *node, *next_node;
+ walk->get_next = &revwalk_next_timesort;
+ walk->enqueue = &revwalk_enqueue_timesort;
+ } else {
+ walk->get_next = &revwalk_next_unsorted;
+ walk->enqueue = &revwalk_enqueue_unsorted;
+ }
- node = list->head;
- while (node) {
- next_node = node->next;
- free(node);
- node = next_node;
+ for (i = 0; i < walk->pending.length; ++i) {
+ commit_object *commit = walk->pending.contents[i];
+ if ((error = process_commit(walk, commit)) < GIT_SUCCESS) {
+ return error;
+ }
}
- list->head = list->tail = NULL;
- list->size = 0;
-}
+ if (walk->sorting & GIT_SORT_TOPOLOGICAL) {
+ commit_object *next;
+ unsigned short i;
+ int error;
-void git_revwalk_list_timesort(git_revwalk_list *list)
-{
- git_revwalk_listnode *p, *q, *e;
- int in_size, p_size, q_size, merge_count, i;
+ while ((error = walk->get_next(&next, walk)) == GIT_SUCCESS) {
+ for (i = 0; i < next->out_degree; ++i) {
+ commit_object *parent = next->parents[i];
+ parent->in_degree++;
+ }
- if (list->head == NULL)
- return;
+ commit_list_insert(next, &walk->iterator_topo);
+ }
- in_size = 1;
+ if (error != GIT_EREVWALKOVER)
+ return error;
- do {
- p = list->head;
- list->tail = NULL;
- merge_count = 0;
+ walk->get_next = &revwalk_next_toposort;
+ }
- while (p != NULL) {
- merge_count++;
- q = p;
- p_size = 0;
- q_size = in_size;
+ if (walk->sorting & GIT_SORT_REVERSE) {
+ commit_object *next;
+ int error;
- for (i = 0; i < in_size && q; ++i, q = q->next)
- p_size++;
+ while ((error = walk->get_next(&next, walk)) == GIT_SUCCESS)
+ commit_list_insert(next, &walk->iterator_reverse);
- while (p_size > 0 || (q_size > 0 && q)) {
+ if (error != GIT_EREVWALKOVER)
+ return error;
- if (p_size == 0)
- e = q, q = q->next, q_size--;
+ walk->get_next = &revwalk_next_reverse;
+ }
- else if (q_size == 0 || q == NULL ||
- p->walk_commit->commit_object->committer->when.time >=
- q->walk_commit->commit_object->committer->when.time)
- e = p, p = p->next, p_size--;
+ walk->walking = 1;
+ return GIT_SUCCESS;
+}
- else
- e = q, q = q->next, q_size--;
- if (list->tail != NULL)
- list->tail->next = e;
- else
- list->head = e;
+int git_revwalk_next(git_oid *oid, git_revwalk *walk)
+{
+ int error;
+ commit_object *next;
- e->prev = list->tail;
- list->tail = e;
- }
+ assert(walk && oid);
- p = q;
- }
+ if (!walk->walking) {
+ if ((error = prepare_walk(walk)) < GIT_SUCCESS)
+ return error;
+ }
- list->tail->next = NULL;
- in_size *= 2;
+ error = walk->get_next(&next, walk);
+ if (error < GIT_SUCCESS)
+ return error;
- } while (merge_count > 1);
+ git_oid_cpy(oid, &next->oid);
+ return GIT_SUCCESS;
}
-void git_revwalk_list_toposort(git_revwalk_list *list)
+void git_revwalk_reset(git_revwalk *walk)
{
- git_revwalk_commit *commit;
- git_revwalk_list topo;
- memset(&topo, 0x0, sizeof(git_revwalk_list));
-
- while ((commit = git_revwalk_list_pop_back(list)) != NULL) {
- git_revwalk_listnode *p;
-
- if (commit->in_degree > 0) {
- commit->topo_delay = 1;
- continue;
- }
-
- for (p = commit->parents.head; p != NULL; p = p->next) {
- p->walk_commit->in_degree--;
+ const void *_unused;
+ commit_object *commit;
- if (p->walk_commit->in_degree == 0 && p->walk_commit->topo_delay) {
- p->walk_commit->topo_delay = 0;
- git_revwalk_list_push_back(list, p->walk_commit);
- }
- }
+ assert(walk);
- git_revwalk_list_push_back(&topo, commit);
- }
+ GIT_HASHTABLE_FOREACH(walk->commits, _unused, commit,
+ commit->seen = 0;
+ commit->in_degree = 0;
+ commit->topo_delay = 0;
+ );
- list->head = topo.head;
- list->tail = topo.tail;
- list->size = topo.size;
+ git_pqueue_free(&walk->iterator_time);
+ commit_list_free(walk->iterator_topo);
+ commit_list_free(walk->iterator_rand);
+ commit_list_free(walk->iterator_reverse);
+ walk->walking = 0;
}
diff --git a/src/revwalk.h b/src/revwalk.h
index 7b69ccd63..2970d773c 100644
--- a/src/revwalk.h
+++ b/src/revwalk.h
@@ -8,60 +8,4 @@
#include "repository.h"
#include "hashtable.h"
-struct git_revwalk_commit;
-
-typedef struct git_revwalk_listnode {
- struct git_revwalk_commit *walk_commit;
- struct git_revwalk_listnode *next;
- struct git_revwalk_listnode *prev;
-} git_revwalk_listnode;
-
-typedef struct git_revwalk_list {
- struct git_revwalk_listnode *head;
- struct git_revwalk_listnode *tail;
- size_t size;
-} git_revwalk_list;
-
-
-struct git_revwalk_commit {
-
- git_commit *commit_object;
- git_revwalk_list parents;
-
- unsigned short in_degree;
- unsigned seen:1,
- uninteresting:1,
- topo_delay:1,
- flags:25;
-};
-
-typedef struct git_revwalk_commit git_revwalk_commit;
-
-struct git_revwalk {
- git_repository *repo;
-
- git_hashtable *commits;
- git_revwalk_list iterator;
-
- git_revwalk_commit *(*next)(git_revwalk_list *);
-
- unsigned walking:1;
- unsigned int sorting;
-};
-
-
-void git_revwalk__prepare_walk(git_revwalk *walk);
-int git_revwalk__enroot(git_revwalk *walk, git_commit *commit);
-
-int git_revwalk_list_push_back(git_revwalk_list *list, git_revwalk_commit *commit);
-int git_revwalk_list_push_front(git_revwalk_list *list, git_revwalk_commit *obj);
-
-git_revwalk_commit *git_revwalk_list_pop_back(git_revwalk_list *list);
-git_revwalk_commit *git_revwalk_list_pop_front(git_revwalk_list *list);
-
-void git_revwalk_list_clear(git_revwalk_list *list);
-
-void git_revwalk_list_timesort(git_revwalk_list *list);
-void git_revwalk_list_toposort(git_revwalk_list *list);
-
#endif /* INCLUDE_revwalk_h__ */
diff --git a/src/util.h b/src/util.h
index d5320e15b..fab93c1c2 100644
--- a/src/util.h
+++ b/src/util.h
@@ -93,6 +93,8 @@ GIT_INLINE(int) git__is_sizet(git_off_t p)
extern char *git__strtok(char *output, char *src, char *delimit);
extern char *git__strtok_keep(char *output, char *src, char *delimit);
+#define STRLEN(str) (sizeof(str) - 1)
+
/*
* Realloc the buffer pointed at by variable 'x' so that it can hold
* at least 'nr' entries; the number of entries currently allocated
diff --git a/tests/t05-revwalk.c b/tests/t05-revwalk.c
index fd009fac1..bdec09e83 100644
--- a/tests/t05-revwalk.c
+++ b/tests/t05-revwalk.c
@@ -70,12 +70,12 @@ static const int commit_sorting_time_reverse[][6] = {
static const int result_bytes = 24;
-static int get_commit_index(git_commit *commit)
+static int get_commit_index(git_oid *raw_oid)
{
int i;
char oid[40];
- git_oid_fmt(oid, &commit->object.id);
+ git_oid_fmt(oid, raw_oid);
for (i = 0; i < commit_count; ++i)
if (memcmp(oid, commit_ids[i], 40) == 0)
@@ -84,23 +84,31 @@ static int get_commit_index(git_commit *commit)
return -1;
}
-static int test_walk(git_revwalk *walk, git_commit *start_from,
+static int test_walk(git_revwalk *walk,
int flags, const int possible_results[][6], int results_count)
{
- git_commit *commit = NULL;
+ git_oid oid;
int i;
int result_array[commit_count];
+ git_revwalk_reset(walk);
git_revwalk_sorting(walk, flags);
- git_revwalk_push(walk, start_from);
for (i = 0; i < commit_count; ++i)
result_array[i] = -1;
i = 0;
- while (git_revwalk_next(&commit, walk) == GIT_SUCCESS)
- result_array[i++] = get_commit_index(commit);
+
+ while (git_revwalk_next(&oid, walk) == GIT_SUCCESS) {
+ result_array[i++] = get_commit_index(&oid);
+ /*{
+ char str[41];
+ git_oid_fmt(str, &oid);
+ str[40] = 0;
+ printf(" %d) %s\n", i, str);
+ }*/
+ }
for (i = 0; i < results_count; ++i)
if (memcmp(possible_results[i],
@@ -114,103 +122,26 @@ BEGIN_TEST(walk0, "do a simple walk on a repo with different sorting modes")
git_oid id;
git_repository *repo;
git_revwalk *walk;
- git_commit *head = NULL;
must_pass(git_repository_open(&repo, REPOSITORY_FOLDER));
must_pass(git_revwalk_new(&walk, repo));
git_oid_mkstr(&id, commit_head);
+ git_revwalk_push(walk, &id);
- must_pass(git_commit_lookup(&head, repo, &id));
-
- must_pass(test_walk(walk, head,
- GIT_SORT_TIME,
- commit_sorting_time, 1));
+ must_pass(test_walk(walk, GIT_SORT_TIME, commit_sorting_time, 1));
- must_pass(test_walk(walk, head,
- GIT_SORT_TOPOLOGICAL,
- commit_sorting_topo, 2));
+ must_pass(test_walk(walk, GIT_SORT_TOPOLOGICAL, commit_sorting_topo, 2));
- must_pass(test_walk(walk, head,
- GIT_SORT_TIME | GIT_SORT_REVERSE,
- commit_sorting_time_reverse, 1));
-
- must_pass(test_walk(walk, head,
- GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE,
- commit_sorting_topo_reverse, 2));
+ must_pass(test_walk(walk, GIT_SORT_TIME | GIT_SORT_REVERSE, commit_sorting_time_reverse, 1));
+ must_pass(test_walk(walk, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE, commit_sorting_topo_reverse, 2));
git_revwalk_free(walk);
git_repository_free(repo);
END_TEST
-BEGIN_TEST(list0, "check that a commit list is properly sorted by time")
-
- git_revwalk_list list;
- git_revwalk_listnode *n;
- int i, t;
- time_t previous_time;
-
-#define TEST_SORTED() \
- previous_time = INT_MAX;\
- for (n = list.head; n != NULL; n = n->next) {\
- must_be_true(n->walk_commit->commit_object->committer->when.time <= previous_time);\
- previous_time = n->walk_commit->commit_object->committer->when.time;\
- }
-
-#define CLEAR_LIST() \
- for (n = list.head; n != NULL; n = n->next) {\
- git_signature_free(n->walk_commit->commit_object->committer);\
- free(n->walk_commit->commit_object);\
- free(n->walk_commit);\
- }\
- git_revwalk_list_clear(&list);
-
- memset(&list, 0x0, sizeof(git_revwalk_list));
- srand((unsigned int)time(NULL));
-
- for (t = 0; t < 20; ++t) {
- const int test_size = rand() % 500 + 500;
-
- /* Purely random sorting test */
- for (i = 0; i < test_size; ++i) {
- git_commit *c = git__malloc(sizeof(git_commit));
- git_revwalk_commit *rc = git__malloc(sizeof(git_revwalk_commit));
-
- c->committer = git_signature_new("", "", (time_t)rand(), 0);
- rc->commit_object = c;
-
- git_revwalk_list_push_back(&list, rc);
- }
-
- git_revwalk_list_timesort(&list);
- TEST_SORTED();
- CLEAR_LIST();
- }
-
- /* Try to sort list with all dates equal. */
- for (i = 0; i < 200; ++i) {
- git_commit *c = git__malloc(sizeof(git_commit));
- git_revwalk_commit *rc = git__malloc(sizeof(git_revwalk_commit));
-
- c->committer = git_signature_new("", "", 0, 0);
- rc->commit_object = c;
-
- git_revwalk_list_push_back(&list, rc);
- }
-
- git_revwalk_list_timesort(&list);
- TEST_SORTED();
- CLEAR_LIST();
-
- /* Try to sort empty list */
- git_revwalk_list_timesort(&list);
- TEST_SORTED();
-
-END_TEST
-
BEGIN_SUITE(revwalk)
ADD_TEST(walk0);
- ADD_TEST(list0);
END_SUITE