#ifndef __DHASH_H__ #define __DHASH_H__ /* A special hash-type for use in part(). It is a store-only * hash in that all key/value pairs are put into the hash. Then it is sorted by * keys ascending with dhash_sort_final where the empty elements come at the end * of the internal array. This need for sorting is actually what prevents us from * using a dhash-based implementaion right now as it is the bottleneck for cases * with many very small partitions. * * It doesn't use a linked list for collision recovery. Instead, on collision it will * walk right in the array to find the first free spot. This search should never take * too long as it uses a fairly good integer-hash function. * * The 'step' parameter isn't currently used. */ #include /* for qsort() */ #define INITIAL_SIZE 4 typedef unsigned int hash_t; typedef struct { int key; AV *val; } dhash_val_t; typedef struct { int max; int size; int count; int step; dhash_val_t *ary; } dhash_t; void dhash_dump(dhash_t *h); int cmp (dhash_val_t *a, dhash_val_t *b) { /* all empty buckets should be at the end of the array */ if (!a->val) return 1; if (!b->val) return -1; return a->key - b->key; } dhash_t * dhash_init() { dhash_t *h; New(0, h, 1, dhash_t); Newz(0, h->ary, INITIAL_SIZE, dhash_val_t); h->max = 0; h->size = INITIAL_SIZE; h->count = 0; return h; } void dhash_destroy(dhash_t *h) { Safefree(h->ary); Safefree(h); } inline hash_t HASH(register hash_t k) { k += (k << 12); k ^= (k >> 22); k += (k << 4); k ^= (k >> 9); k += (k << 10); k ^= (k >> 2); k += (k << 7); k ^= (k >> 12); return k; } void dhash_insert(dhash_t *h, int key, SV *sv, register hash_t hash) { while (h->ary[hash].val && h->ary[hash].key != key) hash = (hash + 1) % h->size; if (!h->ary[hash].val) { h->ary[hash].val = newAV(); h->ary[hash].key = key; h->count++; } av_push(h->ary[hash].val, sv); SvREFCNT_inc(sv); } void dhash_resize(dhash_t *h) { register int i; register hash_t hash; dhash_val_t *old = h->ary; h->size <<= 1; h->count = 0; Newz(0, h->ary, h->size, dhash_val_t); for (i = 0; i < h->size>>1; ++i) { if (!old[i].val) continue; hash = HASH(old[i].key) % h->size; while (h->ary[hash].val) hash = (hash + 1) % h->size; h->ary[hash] = old[i]; ++h->count; } Safefree(old); } void dhash_store(dhash_t *h, int key, SV *val) { hash_t hash; if ((double)h->count / (double)h->size > 0.75) dhash_resize(h); hash = HASH(key) % h->size; dhash_insert(h, key, val, hash); if (key > h->max) h->max = key; } /* Once this is called, the hash is no longer useable. The only thing * that may be done with it is iterate over h->ary to get the values * sorted by keys */ void dhash_sort_final(dhash_t *h) { qsort(h->ary, h->size, sizeof(dhash_val_t), (int(*)(const void*,const void*))cmp); } void dhash_dump(dhash_t *h) { int i; fprintf(stderr, "max=%i, size=%i, count=%i, ary=%p\n", h->max, h->size, h->count, h->ary); for (i = 0; i < h->size; i++) { fprintf(stderr, "%2i: key=%-5i => val=(AV*)%p\n", i, h->ary[i].key, h->ary[i].val); } } #endif