summaryrefslogtreecommitdiff
path: root/dhash.h
blob: 63b5f6b4364568085fe7c27c911840cb2b69cf2f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#ifndef __DHASH_H__
#define __DHASH_H__

/* A special hash-type for use in part(). It is a store-only
 * hash in that all key/value pairs are put into the hash. Then it is sorted by
 * keys ascending with dhash_sort_final where the empty elements come at the end
 * of the internal array. This need for sorting is actually what prevents us from
 * using a dhash-based implementaion right now as it is the bottleneck for cases
 * with many very small partitions.
 *
 * It doesn't use a linked list for collision recovery. Instead, on collision it will
 * walk right in the array to find the first free spot. This search should never take
 * too long as it uses a fairly good integer-hash function.
 *
 * The 'step' parameter isn't currently used. 
 */

#include <stdlib.h>	/* for qsort() */

#define INITIAL_SIZE 4

typedef unsigned int hash_t;

typedef struct {
    int key;
    AV *val;
} dhash_val_t;

typedef struct {
    int max;
    int size;
    int count;
    int step;
    dhash_val_t *ary;
} dhash_t;

void dhash_dump(dhash_t *h);

int cmp (dhash_val_t *a, dhash_val_t *b) {
    /* all empty buckets should be at the end of the array */
    if (!a->val)
	return 1;
    if (!b->val)
	return -1;
    return a->key - b->key;
}

dhash_t * dhash_init() {
    dhash_t *h;
    New(0, h, 1, dhash_t);
    Newz(0, h->ary, INITIAL_SIZE, dhash_val_t);
    h->max = 0;
    h->size = INITIAL_SIZE;
    h->count = 0;
    return h;
}

void dhash_destroy(dhash_t *h) {
    Safefree(h->ary);
    Safefree(h);
}

inline hash_t HASH(register hash_t k) {
    k += (k << 12);
    k ^= (k >> 22);
    k += (k << 4);
    k ^= (k >> 9);
    k += (k << 10);
    k ^= (k >> 2);
    k += (k << 7);
    k ^= (k >> 12);
    return k;
}

void dhash_insert(dhash_t *h, int key, SV *sv, register hash_t hash) {

    while (h->ary[hash].val && h->ary[hash].key != key)
	hash = (hash + 1) % h->size;
    
    if (!h->ary[hash].val) {
	h->ary[hash].val = newAV();
	h->ary[hash].key = key;
	h->count++;
    }
    
    av_push(h->ary[hash].val, sv);
    SvREFCNT_inc(sv);
}

void dhash_resize(dhash_t *h) {
    
    register int i;
    register hash_t hash;
    dhash_val_t *old = h->ary;
    
    h->size <<= 1;
    h->count = 0;
    Newz(0, h->ary, h->size, dhash_val_t);
    
    for (i = 0; i < h->size>>1; ++i) {
	if (!old[i].val)
	    continue;
	hash = HASH(old[i].key) % h->size;
	while (h->ary[hash].val)
	    hash = (hash + 1) % h->size;
	h->ary[hash] = old[i];
	++h->count;
    }
    Safefree(old);
}

void dhash_store(dhash_t *h, int key, SV *val) {
    hash_t hash;
    if ((double)h->count / (double)h->size > 0.75)
	dhash_resize(h);
    hash = HASH(key) % h->size;
    dhash_insert(h, key, val, hash);
    if (key > h->max)
	h->max = key;
}

/* Once this is called, the hash is no longer useable. The only thing
 * that may be done with it is iterate over h->ary to get the values
 * sorted by keys */
void dhash_sort_final(dhash_t *h) {
    qsort(h->ary, h->size, sizeof(dhash_val_t), (int(*)(const void*,const void*))cmp);
}

void dhash_dump(dhash_t *h) {
    int i;
    fprintf(stderr, "max=%i, size=%i, count=%i, ary=%p\n", h->max, h->size, h->count, h->ary);
    for (i = 0; i < h->size; i++) {
	fprintf(stderr, "%2i:  key=%-5i => val=(AV*)%p\n", i, h->ary[i].key, h->ary[i].val);
    }
}

#endif