blob: 53f2043ad1f8742283e2d4edeac84bae8ea9d017 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
#ifndef NU_MPH_H
#define NU_MPH_H
/* Intentionally undocumented
*
* http://iswsa.acm.org/mphf/index.html
*/
#include <stdint.h>
#include <sys/types.h>
#include <libnu/config.h>
#if defined (__cplusplus) || defined (c_plusplus)
extern "C" {
#endif
#ifdef NU_WITH_UDB
/* those need to be the same values as used in MPH generation */
#define PRIME 0x01000193
/** Calculate G offset from codepoint
*/
static inline
uint32_t _nu_hash(uint32_t hash, uint32_t codepoint) {
if (hash == 0) {
hash = PRIME;
}
return hash ^ codepoint;
}
/** Get hash value of Unicode codepoint
*/
static inline
uint32_t nu_mph_hash(const int16_t *G, size_t G_SIZE,
uint32_t codepoint) {
uint32_t h = _nu_hash(0, codepoint);
int16_t offset = G[h % G_SIZE];
if (offset < 0) {
return (uint32_t)(-offset - 1);
}
return (_nu_hash(offset, codepoint) % G_SIZE);
}
/** Lookup value in MPH
*/
static inline
uint32_t nu_mph_lookup(const uint32_t *V_C, const uint16_t *V_I,
uint32_t codepoint, uint32_t hash) {
const uint32_t *c = (V_C + hash);
const uint16_t *i = (V_I + hash);
/* due to nature of minimal perfect hash, it will always
* produce collision for codepoints outside of MPH original set.
* thus VALUES_C contain original codepoint to check if
* collision occurred */
return (*c != codepoint ? 0 : *i);
}
#endif /* NU_WITH_UDB */
#if defined (__cplusplus) || defined (c_plusplus)
}
#endif
#endif /* NU_MPH_H */
|