diff options
author | Junio C Hamano <junkio@cox.net> | 2006-02-27 23:38:50 -0800 |
---|---|---|
committer | Junio C Hamano <junkio@cox.net> | 2006-03-01 01:57:45 -0800 |
commit | c436eb8cf1efa3fe2c70100ae0cbc48f0feaf5af (patch) | |
tree | 0dd15bed22ad6d92659878c6fc73f2e270250d74 | |
parent | d00e0f8101e18e3d48a60d574dacc83dad8e19bb (diff) | |
download | git-c436eb8cf1efa3fe2c70100ae0cbc48f0feaf5af.tar.gz |
diff-delta: cull collided hash bucket more aggressively.
This tries to limit collided hash buckets by removing identical
three-byte prefix from the same hashbucket.
-rw-r--r-- | diff-delta.c | 37 |
1 files changed, 25 insertions, 12 deletions
diff --git a/diff-delta.c b/diff-delta.c index 0730b24df8..b7190ea47a 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -88,22 +88,35 @@ static struct index ** delta_index(const unsigned char *buf, /* * Now make sure none of the hash buckets has more entries than - * we're willing to test. Otherwise we short-circuit the entry - * list uniformly to still preserve a good repartition across - * the reference buffer. + * we're willing to test. Otherwise we cull the entry list to + * limit identical three byte prefixes to still preserve a good + * repartition across the reference buffer. */ for (i = 0; i < hsize; i++) { + struct index **list, *bucket, *remaining; + int cnt; if (hash_count[i] < hlimit) continue; - entry = hash[i]; - do { - struct index *keep = entry; - int skip = hash_count[i] / hlimit / 2; - do { - entry = entry->next; - } while(--skip && entry); - keep->next = entry; - } while(entry); + + bucket = NULL; + list = &bucket; + remaining = hash[i]; + cnt = 0; + while (cnt < hlimit && remaining) { + struct index *this = remaining, *that; + remaining = remaining->next; + for (that = bucket; that; that = that->next) { + if (!memcmp(that->ptr, this->ptr, 3)) + break; + } + if (that) + continue; /* discard */ + cnt++; + *list = this; + list = &(this->next); + this->next = NULL; + } + hash[i] = bucket; } free(hash_count); |