summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <junkio@cox.net>2006-02-27 23:38:50 -0800
committerJunio C Hamano <junkio@cox.net>2006-03-01 01:57:45 -0800
commitc436eb8cf1efa3fe2c70100ae0cbc48f0feaf5af (patch)
tree0dd15bed22ad6d92659878c6fc73f2e270250d74
parentd00e0f8101e18e3d48a60d574dacc83dad8e19bb (diff)
downloadgit-c436eb8cf1efa3fe2c70100ae0cbc48f0feaf5af.tar.gz
diff-delta: cull collided hash bucket more aggressively.
This tries to limit collided hash buckets by removing identical three-byte prefix from the same hashbucket.
-rw-r--r--diff-delta.c37
1 files changed, 25 insertions, 12 deletions
diff --git a/diff-delta.c b/diff-delta.c
index 0730b24df8..b7190ea47a 100644
--- a/diff-delta.c
+++ b/diff-delta.c
@@ -88,22 +88,35 @@ static struct index ** delta_index(const unsigned char *buf,
/*
* Now make sure none of the hash buckets has more entries than
- * we're willing to test. Otherwise we short-circuit the entry
- * list uniformly to still preserve a good repartition across
- * the reference buffer.
+ * we're willing to test. Otherwise we cull the entry list to
+ * limit identical three byte prefixes to still preserve a good
+ * repartition across the reference buffer.
*/
for (i = 0; i < hsize; i++) {
+ struct index **list, *bucket, *remaining;
+ int cnt;
if (hash_count[i] < hlimit)
continue;
- entry = hash[i];
- do {
- struct index *keep = entry;
- int skip = hash_count[i] / hlimit / 2;
- do {
- entry = entry->next;
- } while(--skip && entry);
- keep->next = entry;
- } while(entry);
+
+ bucket = NULL;
+ list = &bucket;
+ remaining = hash[i];
+ cnt = 0;
+ while (cnt < hlimit && remaining) {
+ struct index *this = remaining, *that;
+ remaining = remaining->next;
+ for (that = bucket; that; that = that->next) {
+ if (!memcmp(that->ptr, this->ptr, 3))
+ break;
+ }
+ if (that)
+ continue; /* discard */
+ cnt++;
+ *list = this;
+ list = &(this->next);
+ this->next = NULL;
+ }
+ hash[i] = bucket;
}
free(hash_count);