diff options
| author | David Kastrup <dak@gnu.org> | 2007-09-08 23:25:55 +0200 | 
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2007-09-09 17:16:49 -0700 | 
| commit | 02e665ce491296245f474dafdc02d47a6c8afa86 (patch) | |
| tree | a47e82077da6f80abc52ccc898e5f03f883c3417 /diff-delta.c | |
| parent | d2100860fd67dec6474157697888caaa0a0f51d0 (diff) | |
| download | git-02e665ce491296245f474dafdc02d47a6c8afa86.tar.gz | |
diff-delta.c: Rationalize culling of hash buckets
The previous hash bucket culling resulted in a somewhat unpredictable
number of hash bucket entries in the order of magnitude of HASH_LIMIT.
Replace this with a Bresenham-like algorithm leaving us with exactly
HASH_LIMIT entries by uniform culling.
Signed-off-by: David Kastrup <dak@gnu.org>
Acked-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'diff-delta.c')
| -rw-r--r-- | diff-delta.c | 41 | 
1 files changed, 31 insertions, 10 deletions
| diff --git a/diff-delta.c b/diff-delta.c index 7f8a60de18..9e440a9299 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -207,19 +207,40 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)  	 * the reference buffer.  	 */  	for (i = 0; i < hsize; i++) { -		if (hash_count[i] < HASH_LIMIT) +		int acc; + +		if (hash_count[i] <= HASH_LIMIT)  			continue; + +		entries -= hash_count[i] - HASH_LIMIT; +		/* We leave exactly HASH_LIMIT entries in the bucket */ +  		entry = hash[i]; +		acc = 0;  		do { -			struct unpacked_index_entry *keep = entry; -			int skip = hash_count[i] / HASH_LIMIT; -			do { -				--entries; -				entry = entry->next; -			} while(--skip && entry); -			++entries; -			keep->next = entry; -		} while(entry); +			acc += hash_count[i] - HASH_LIMIT; +			if (acc > 0) { +				struct unpacked_index_entry *keep = entry; +				do { +					entry = entry->next; +					acc -= HASH_LIMIT; +				} while (acc > 0); +				keep->next = entry->next; +			} +			entry = entry->next; +		} while (entry); + +		/* Assume that this loop is gone through exactly +		 * HASH_LIMIT times and is entered and left with +		 * acc==0.  So the first statement in the loop +		 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT +		 * to the accumulator, and the inner loop consequently +		 * is run (hash_count[i]-HASH_LIMIT) times, removing +		 * one element from the list each time.  Since acc +		 * balances out to 0 at the final run, the inner loop +		 * body can't be left with entry==NULL.  So we indeed +		 * encounter entry==NULL in the outer loop only. +		 */  	}  	free(hash_count); | 
