summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRussell Belfer <rb@github.com>2012-04-23 09:23:58 -0700
committerRussell Belfer <rb@github.com>2012-04-25 11:14:34 -0700
commitc16c8b9a7e7588f4ced41aa8f9787495f41fd918 (patch)
tree903c4339029c261c5a680f06b9ee26100c416c57
parent25f258e735f707075dc1b5cdd804540fe1e43f37 (diff)
downloadlibgit2-c16c8b9a7e7588f4ced41aa8f9787495f41fd918.tar.gz
Adding stash to hashtable implementation
Adding a small stash of nodes with key conflicts has been demonstrated to greatly increase the efficiency of a cuckoo hashtable. See: http://research.microsoft.com/pubs/73856/stash-full.9-30.pdf for more details.
-rw-r--r--src/hashtable.c143
-rw-r--r--src/hashtable.h13
2 files changed, 112 insertions, 44 deletions
diff --git a/src/hashtable.c b/src/hashtable.c
index 8e057d4b1..e2f131cf1 100644
--- a/src/hashtable.c
+++ b/src/hashtable.c
@@ -18,28 +18,37 @@ static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key,
static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other);
static int node_insert(git_hashtable *self, git_hashtable_node *new_node);
static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size);
+static void reinsert_stash(git_hashtable *self);
static int resize_to(git_hashtable *self, size_t new_size)
{
git_hashtable_node *old_nodes = self->nodes;
size_t old_size = self->size;
+ git_hashtable_node old_stash[GIT_HASHTABLE_STASH_SIZE];
+ size_t old_stash_count = self->stash_count;
self->is_resizing = 1;
+ if (old_stash_count > 0)
+ memcpy(old_stash, self->stash,
+ old_stash_count * sizeof(git_hashtable_node));
+
do {
self->size = new_size;
self->size_mask = new_size - 1;
self->key_count = 0;
+ self->stash_count = 0;
self->nodes = git__calloc(1, sizeof(git_hashtable_node) * self->size);
GITERR_CHECK_ALLOC(self->nodes);
- if (insert_nodes(self, old_nodes, old_size) == 0)
+ if (insert_nodes(self, old_nodes, old_size) == 0 &&
+ insert_nodes(self, old_stash, old_stash_count) == 0)
self->is_resizing = 0;
else {
new_size *= 2;
git__free(self->nodes);
}
- } while(self->is_resizing);
+ } while (self->is_resizing);
git__free(old_nodes);
return 0;
@@ -47,26 +56,28 @@ static int resize_to(git_hashtable *self, size_t new_size)
static int set_size(git_hashtable *self, size_t new_size)
{
- self->nodes = git__realloc(self->nodes, new_size * sizeof(git_hashtable_node));
+ self->nodes =
+ git__realloc(self->nodes, new_size * sizeof(git_hashtable_node));
GITERR_CHECK_ALLOC(self->nodes);
- if (new_size > self->size) {
+ if (new_size > self->size)
memset(&self->nodes[self->size], 0x0,
(new_size - self->size) * sizeof(git_hashtable_node));
- }
self->size = new_size;
self->size_mask = new_size - 1;
return 0;
}
-static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id)
+GIT_INLINE(git_hashtable_node *)node_with_hash(
+ git_hashtable *self, const void *key, int hash_id)
{
size_t pos = self->hash(key, hash_id) & self->size_mask;
return git_hashtable_node_at(self->nodes, pos);
}
-static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other)
+GIT_INLINE(void) node_swap_with(
+ git_hashtable_node *self, git_hashtable_node *other)
{
git_hashtable_node tmp = *self;
*self = *other;
@@ -76,19 +87,26 @@ static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other)
static int node_insert(git_hashtable *self, git_hashtable_node *new_node)
{
int iteration, hash_id;
+ git_hashtable_node *node;
for (iteration = 0; iteration < MAX_LOOPS; iteration++) {
for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
- git_hashtable_node *node;
node = node_with_hash(self, new_node->key, hash_id);
node_swap_with(new_node, node);
- if (new_node->key == 0x0){
+ if (new_node->key == 0x0) {
self->key_count++;
return 0;
}
}
}
+ /* Insert into stash if there is space */
+ if (self->stash_count < GIT_HASHTABLE_STASH_SIZE) {
+ node_swap_with(new_node, &self->stash[self->stash_count++]);
+ self->key_count++;
+ return 0;
+ }
+
/* Failed to insert node. Hashtable is currently resizing */
assert(!self->is_resizing);
@@ -105,14 +123,29 @@ static int insert_nodes(
for (i = 0; i < old_size; ++i) {
git_hashtable_node *node = git_hashtable_node_at(old_nodes, i);
- if (node->key &&
- git_hashtable_insert(self, node->key, node->value) < 0)
+ if (node->key && node_insert(self, node) < 0)
return -1;
}
return 0;
}
+static void reinsert_stash(git_hashtable *self)
+{
+ int stash_count;
+ struct git_hashtable_node stash[GIT_HASHTABLE_STASH_SIZE];
+
+ if (self->stash_count <= 0)
+ return;
+
+ memcpy(stash, self->stash, self->stash_count * sizeof(git_hashtable_node));
+ stash_count = self->stash_count;
+ self->stash_count = 0;
+
+ /* the node_insert() calls *cannot* fail because the stash is empty */
+ insert_nodes(self, stash, stash_count);
+}
+
git_hashtable *git_hashtable_alloc(
size_t min_size,
git_hash_ptr hash,
@@ -127,21 +160,11 @@ git_hashtable *git_hashtable_alloc(
memset(table, 0x0, sizeof(git_hashtable));
- if (min_size < 8)
- min_size = 8;
-
- /* round up size to closest power of 2 */
- min_size--;
- min_size |= min_size >> 1;
- min_size |= min_size >> 2;
- min_size |= min_size >> 4;
- min_size |= min_size >> 8;
- min_size |= min_size >> 16;
-
table->hash = hash;
table->key_equal = key_eq;
- set_size(table, min_size + 1);
+ min_size = git__size_t_powerof2(min_size < 8 ? 8 : min_size);
+ set_size(table, min_size);
return table;
}
@@ -151,6 +174,8 @@ void git_hashtable_clear(git_hashtable *self)
assert(self);
memset(self->nodes, 0x0, sizeof(git_hashtable_node) * self->size);
+
+ self->stash_count = 0;
self->key_count = 0;
}
@@ -200,39 +225,70 @@ int git_hashtable_insert2(
}
}
-void *git_hashtable_lookup(git_hashtable *self, const void *key)
+static git_hashtable_node *find_node(git_hashtable *self, const void *key)
{
- int hash_id;
+ int hash_id, count = 0;
git_hashtable_node *node;
- assert(self && self->nodes);
-
for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
node = node_with_hash(self, key, hash_id);
- if (node->key && self->key_equal(key, node->key) == 0)
- return node->value;
+ if (node->key) {
+ ++count;
+ if (self->key_equal(key, node->key) == 0)
+ return node;
+ }
+ }
+
+ /* check stash if not found but all slots were filled */
+ if (count == GIT_HASHTABLE_HASHES) {
+ for (count = 0; count < self->stash_count; ++count)
+ if (self->key_equal(key, self->stash[count].key) == 0)
+ return &self->stash[count];
}
return NULL;
}
+static void reset_stash(git_hashtable *self, git_hashtable_node *node)
+{
+ /* if node was in stash, then compact stash */
+ ssize_t offset = node - self->stash;
+
+ if (offset >= 0 && offset < self->stash_count) {
+ if (offset < self->stash_count - 1)
+ memmove(node, node + 1, (self->stash_count - offset) *
+ sizeof(git_hashtable_node));
+ self->stash_count--;
+ }
+
+ reinsert_stash(self);
+}
+
+void *git_hashtable_lookup(git_hashtable *self, const void *key)
+{
+ git_hashtable_node *node;
+ assert(self && key);
+ node = find_node(self, key);
+ return node ? node->value : NULL;
+}
+
int git_hashtable_remove2(
git_hashtable *self, const void *key, void **old_value)
{
- int hash_id;
git_hashtable_node *node;
assert(self && self->nodes);
- for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
- node = node_with_hash(self, key, hash_id);
- if (node->key && self->key_equal(key, node->key) == 0) {
- *old_value = node->value;
- node->key = NULL;
- node->value = NULL;
- self->key_count--;
- return 0;
- }
+ node = find_node(self, key);
+ if (node) {
+ *old_value = node->value;
+
+ node->key = NULL;
+ node->value = NULL;
+ self->key_count--;
+
+ reset_stash(self, node);
+ return 0;
}
return GIT_ENOTFOUND;
@@ -240,10 +296,15 @@ int git_hashtable_remove2(
int git_hashtable_merge(git_hashtable *self, git_hashtable *other)
{
- if (resize_to(self, (self->size + other->size) * 2) < 0)
+ size_t new_size = git__size_t_powerof2(self->size + other->size);
+
+ if (resize_to(self, new_size) < 0)
+ return -1;
+
+ if (insert_nodes(self, other->nodes, other->key_count) < 0)
return -1;
- return insert_nodes(self, other->nodes, other->key_count);
+ return insert_nodes(self, other->stash, other->stash_count);
}
diff --git a/src/hashtable.h b/src/hashtable.h
index 0bab84543..448487507 100644
--- a/src/hashtable.h
+++ b/src/hashtable.h
@@ -22,6 +22,8 @@ struct git_hashtable_node {
void *value;
};
+#define GIT_HASHTABLE_STASH_SIZE 3
+
struct git_hashtable {
struct git_hashtable_node *nodes;
@@ -29,6 +31,9 @@ struct git_hashtable {
size_t size;
size_t key_count;
+ struct git_hashtable_node stash[GIT_HASHTABLE_STASH_SIZE];
+ int stash_count;
+
int is_resizing;
git_hash_ptr hash;
@@ -38,9 +43,11 @@ struct git_hashtable {
typedef struct git_hashtable_node git_hashtable_node;
typedef struct git_hashtable git_hashtable;
-git_hashtable *git_hashtable_alloc(size_t min_size,
- git_hash_ptr hash,
- git_hash_keyeq_ptr key_eq);
+git_hashtable *git_hashtable_alloc(
+ size_t min_size,
+ git_hash_ptr hash,
+ git_hash_keyeq_ptr key_eq);
+
void *git_hashtable_lookup(git_hashtable *h, const void *key);
int git_hashtable_remove2(git_hashtable *table, const void *key, void **old_value);