summaryrefslogtreecommitdiff
path: root/vendor/nunicode/include/libnu/mph.h
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/nunicode/include/libnu/mph.h')
-rw-r--r--vendor/nunicode/include/libnu/mph.h71
1 files changed, 71 insertions, 0 deletions
diff --git a/vendor/nunicode/include/libnu/mph.h b/vendor/nunicode/include/libnu/mph.h
new file mode 100644
index 0000000000..53f2043ad1
--- /dev/null
+++ b/vendor/nunicode/include/libnu/mph.h
@@ -0,0 +1,71 @@
+#ifndef NU_MPH_H
+#define NU_MPH_H
+
+/* Intentionally undocumented
+ *
+ * http://iswsa.acm.org/mphf/index.html
+ */
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <libnu/config.h>
+
+#if defined (__cplusplus) || defined (c_plusplus)
+extern "C" {
+#endif
+
+#ifdef NU_WITH_UDB
+
+/* those need to be the same values as used in MPH generation */
+#define PRIME 0x01000193
+
+/** Calculate G offset from codepoint
+ */
+static inline
+uint32_t _nu_hash(uint32_t hash, uint32_t codepoint) {
+ if (hash == 0) {
+ hash = PRIME;
+ }
+
+ return hash ^ codepoint;
+}
+
+/** Get hash value of Unicode codepoint
+ */
+static inline
+uint32_t nu_mph_hash(const int16_t *G, size_t G_SIZE,
+ uint32_t codepoint) {
+
+ uint32_t h = _nu_hash(0, codepoint);
+ int16_t offset = G[h % G_SIZE];
+ if (offset < 0) {
+ return (uint32_t)(-offset - 1);
+ }
+ return (_nu_hash(offset, codepoint) % G_SIZE);
+}
+
+/** Lookup value in MPH
+ */
+static inline
+uint32_t nu_mph_lookup(const uint32_t *V_C, const uint16_t *V_I,
+ uint32_t codepoint, uint32_t hash) {
+
+ const uint32_t *c = (V_C + hash);
+ const uint16_t *i = (V_I + hash);
+
+ /* due to nature of minimal perfect hash, it will always
+ * produce collision for codepoints outside of MPH original set.
+ * thus VALUES_C contain original codepoint to check if
+ * collision occurred */
+
+ return (*c != codepoint ? 0 : *i);
+}
+
+#endif /* NU_WITH_UDB */
+
+#if defined (__cplusplus) || defined (c_plusplus)
+}
+#endif
+
+#endif /* NU_MPH_H */