summaryrefslogtreecommitdiff
path: root/vendor/nunicode/include/libnu/mph.h
blob: 53f2043ad1f8742283e2d4edeac84bae8ea9d017 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#ifndef NU_MPH_H
#define NU_MPH_H

/* Intentionally undocumented
 *
 * http://iswsa.acm.org/mphf/index.html
 */

#include <stdint.h>
#include <sys/types.h>

#include <libnu/config.h>

#if defined (__cplusplus) || defined (c_plusplus)
extern "C" {
#endif

#ifdef NU_WITH_UDB

/* those need to be the same values as used in MPH generation */
#define PRIME        0x01000193

/** Calculate G offset from codepoint
 */
static inline
uint32_t _nu_hash(uint32_t hash, uint32_t codepoint) {
	if (hash == 0) {
		hash = PRIME;
	}

	return hash ^ codepoint;
}

/** Get hash value of Unicode codepoint
 */
static inline
uint32_t nu_mph_hash(const int16_t *G, size_t G_SIZE,
	uint32_t codepoint) {

	uint32_t h = _nu_hash(0, codepoint);
	int16_t offset = G[h % G_SIZE];
	if (offset < 0) {
		return (uint32_t)(-offset - 1);
	}
	return (_nu_hash(offset, codepoint) % G_SIZE);
}

/** Lookup value in MPH
 */
static inline
uint32_t nu_mph_lookup(const uint32_t *V_C, const uint16_t *V_I,
	uint32_t codepoint, uint32_t hash) {

	const uint32_t *c = (V_C + hash);
	const uint16_t *i = (V_I + hash);

	/* due to nature of minimal perfect hash, it will always
	 * produce collision for codepoints outside of MPH original set.
	 * thus VALUES_C contain original codepoint to check if
	 * collision occurred */

	return (*c != codepoint ? 0 : *i);
}

#endif /* NU_WITH_UDB */

#if defined (__cplusplus) || defined (c_plusplus)
}
#endif

#endif /* NU_MPH_H */