summaryrefslogtreecommitdiff
path: root/erts/emulator/beam/erl_map.h
diff options
context:
space:
mode:
authorSverker Eriksson <sverker@erlang.org>2023-03-31 22:53:00 +0200
committerSverker Eriksson <sverker@erlang.org>2023-04-26 18:27:56 +0200
commite1044e4213f7f793c5ff2380081766954517d6f9 (patch)
treeb9edf1c0eb5142f18b68894f0ccd61fcaad07eda /erts/emulator/beam/erl_map.h
parent2912a1899deaaa032f128bda11b8277f59b1930e (diff)
downloaderlang-e1044e4213f7f793c5ff2380081766954517d6f9.tar.gz
erts: Implement hashmap collision nodes
Restrict depth of hashmap tree to 8 levels. Instead of rehashing with salt, give up and put colliding keys in "collision nodes" at the bottom of the tree. Collision nodes are normal tuples of arity 2 or larger. The elements of collision nodes are key-value cons cells like the other nodes, but they are sorted in map-key order. We do linear search in them, but that's ok as they should be small and rare in practice. Why? We had some scary encounter with forever colliding term pairs in make_internal_hash(). Even though that has been fixed we will sleep better at night knowing the hashmaps may not recurse forever draining all memory of the machine.
Diffstat (limited to 'erts/emulator/beam/erl_map.h')
-rw-r--r--erts/emulator/beam/erl_map.h30
1 files changed, 22 insertions, 8 deletions
diff --git a/erts/emulator/beam/erl_map.h b/erts/emulator/beam/erl_map.h
index b451bad7e1..9ecacb8aa0 100644
--- a/erts/emulator/beam/erl_map.h
+++ b/erts/emulator/beam/erl_map.h
@@ -56,17 +56,15 @@ typedef struct flatmap_s {
/* the head-node is a bitmap or array with an untagged size */
#define hashmap_size(x) (((hashmap_head_t*) hashmap_val(x))->size)
-#define hashmap_make_hash(Key) make_map_hash(Key, 0)
+#define hashmap_make_hash(Key) make_map_hash(Key)
#define hashmap_restore_hash(Lvl, Key) \
- (((Lvl) < 8) ? \
- hashmap_make_hash(Key) >> (4*(Lvl)) : \
- make_map_hash(Key, ((Lvl) >> 3)) >> (4 * ((Lvl) & 7)))
+ (ASSERT(Lvl < 8), \
+ hashmap_make_hash(Key) >> (4*(Lvl)))
#define hashmap_shift_hash(Hx, Lvl, Key) \
- (((++(Lvl)) & 7) ? \
- (Hx) >> 4 : \
- make_map_hash(Key, ((Lvl) >> 3)))
+ (++(Lvl), ASSERT(Lvl <= HAMT_MAX_LEVEL), /* we allow one level too much */\
+ (Hx) >> 4)
/* erl_term.h stuff */
#define flatmap_get_values(x) (((Eterm *)(x)) + sizeof(flatmap_t)/sizeof(Eterm))
@@ -147,7 +145,8 @@ typedef struct hashmap_head_s {
/* erl_map.h stuff */
-#define is_hashmap_header_head(x) ((MAP_HEADER_TYPE(x) & (0x2)))
+#define is_hashmap_header_head(x) (MAP_HEADER_TYPE(x) & (0x2))
+#define is_hashmap_header_node(x) (MAP_HEADER_TYPE(x) == 1)
#define MAKE_MAP_HEADER(Type,Arity,Val) \
(_make_header(((((Uint16)(Val)) << MAP_HEADER_ARITY_SZ) | (Arity)) << MAP_HEADER_TAG_SZ | (Type) , _TAG_HEADER_MAP))
@@ -164,12 +163,21 @@ typedef struct hashmap_head_s {
#define MAP_HEADER_HAMT_NODE_BITMAP(Bmp) \
MAKE_MAP_HEADER(MAP_HEADER_TAG_HAMT_NODE_BITMAP,0x0,Bmp)
+#define MAP_HEADER_HAMT_COLLISION_NODE(Arity) make_arityval(Arity)
+
#define MAP_HEADER_FLATMAP_SZ (sizeof(flatmap_t) / sizeof(Eterm))
#define HAMT_NODE_ARRAY_SZ (17)
#define HAMT_HEAD_ARRAY_SZ (18)
#define HAMT_NODE_BITMAP_SZ(n) (1 + n)
#define HAMT_HEAD_BITMAP_SZ(n) (2 + n)
+#define HAMT_COLLISION_NODE_SZ(n) (1 + n)
+/*
+ * Collision nodes are used when all hash bits have been exhausted.
+ * They are normal tuples of arity 2 or larger. The elements of a collision
+ * node tuple contain key-value cons cells like the other nodes,
+ * but they are sorted in map-key order.
+ */
/* 2 bits maps tag + 4 bits subtag + 2 ignore bits */
#define _HEADER_MAP_SUBTAG_MASK (0xfc)
@@ -183,11 +191,17 @@ typedef struct hashmap_head_s {
#define hashmap_index(hash) (((Uint32)hash) & 0xf)
+#define HAMT_MAX_LEVEL 8
+
/* hashmap heap size:
[one cons cell + one list term in parent node] per key
[one header + one boxed term in parent node] per inner node
[one header + one size word] for root node
Observed average number of nodes per key is about 0.35.
+
+ Amendment: This size estimation does not take collision nodes into account.
+ It should be good enough though, as collision nodes are rare
+ and only make the size smaller compared to unlimited HAMT depth.
*/
#define HASHMAP_WORDS_PER_KEY 3
#define HASHMAP_WORDS_PER_NODE 2