summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSverker Eriksson <sverker@erlang.org>2023-03-31 22:53:00 +0200
committerSverker Eriksson <sverker@erlang.org>2023-04-26 18:27:56 +0200
commite1044e4213f7f793c5ff2380081766954517d6f9 (patch)
treeb9edf1c0eb5142f18b68894f0ccd61fcaad07eda
parent2912a1899deaaa032f128bda11b8277f59b1930e (diff)
downloaderlang-e1044e4213f7f793c5ff2380081766954517d6f9.tar.gz
erts: Implement hashmap collision nodes
Restrict depth of hashmap tree to 8 levels. Instead of rehashing with salt, give up and put colliding keys in "collision nodes" at the bottom of the tree. Collision nodes are normal tuples of arity 2 or larger. The elements of collision nodes are key-value cons cells like the other nodes, but they are sorted in map-key order. We do linear search in them, but that's ok as they should be small and rare in practice. Why? We had some scary encounter with forever colliding term pairs in make_internal_hash(). Even though that has been fixed we will sleep better at night knowing the hashmaps may not recurse forever draining all memory of the machine.
-rw-r--r--erts/emulator/beam/erl_bif_info.c7
-rw-r--r--erts/emulator/beam/erl_db_util.c99
-rw-r--r--erts/emulator/beam/erl_map.c654
-rw-r--r--erts/emulator/beam/erl_map.h30
-rw-r--r--erts/emulator/beam/erl_utils.h8
-rw-r--r--erts/emulator/beam/external.c49
-rw-r--r--erts/emulator/beam/utils.c111
-rw-r--r--erts/emulator/test/map_SUITE.erl70
8 files changed, 680 insertions, 348 deletions
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index d5e598515c..6cb383436b 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -4149,6 +4149,13 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1)
else if (ERTS_IS_ATOM_STR("persistent_term", BIF_ARG_1)) {
BIF_RET(erts_debug_persistent_term_xtra_info(BIF_P));
}
+ else if (ERTS_IS_ATOM_STR("hashmap_collision_bonanza", BIF_ARG_1)) {
+#ifdef DBG_HASHMAP_COLLISION_BONANZA
+ return am_true;
+#else
+ return am_false;
+#endif
+ }
}
else if (is_tuple(BIF_ARG_1)) {
Eterm* tp = tuple_val(BIF_ARG_1);
diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c
index 8bace68d0f..9162329c8a 100644
--- a/erts/emulator/beam/erl_db_util.c
+++ b/erts/emulator/beam/erl_db_util.c
@@ -878,8 +878,8 @@ static Eterm dmc_lookup_bif_reversed(void *f);
static int cmp_uint(void *a, void *b);
static int cmp_guard_bif(void *a, void *b);
static int match_compact(ErlHeapFragment *expr, DMCErrInfo *err_info);
-static Uint my_size_object(Eterm t);
-static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap);
+static Uint my_size_object(Eterm t, int is_hashmap_node);
+static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap, int);
/* Guard subroutines */
static void
@@ -3786,11 +3786,11 @@ static void do_emit_constant(DMCContext *context, DMC_STACK_TYPE(UWord) *text,
if (is_immed(t)) {
tmp = t;
} else {
- sz = my_size_object(t);
+ sz = my_size_object(t, 0);
if (sz) {
emb = new_message_buffer(sz);
hp = emb->mem;
- tmp = my_copy_struct(t,&hp,&(emb->off_heap));
+ tmp = my_copy_struct(t,&hp,&(emb->off_heap), 0);
emb->next = context->save;
context->save = emb;
}
@@ -5251,33 +5251,39 @@ static int match_compact(ErlHeapFragment *expr, DMCErrInfo *err_info)
/*
** Simple size object that takes care of function calls and constant tuples
*/
-static Uint my_size_object(Eterm t)
+static Uint my_size_object(Eterm t, int is_hashmap_node)
{
Uint sum = 0;
- Eterm tmp;
Eterm *p;
switch (t & _TAG_PRIMARY_MASK) {
case TAG_PRIMARY_LIST:
- sum += 2 + my_size_object(CAR(list_val(t))) +
- my_size_object(CDR(list_val(t)));
+ sum += 2 + my_size_object(CAR(list_val(t)), 0) +
+ my_size_object(CDR(list_val(t)), 0);
break;
case TAG_PRIMARY_BOXED:
if (is_tuple(t)) {
- if (tuple_val(t)[0] == make_arityval(1) && is_tuple(tmp = tuple_val(t)[1])) {
- Uint i,n;
- p = tuple_val(tmp);
- n = arityval(p[0]);
- sum += 1 + n;
- for (i = 1; i <= n; ++i)
- sum += my_size_object(p[i]);
- } else if (tuple_val(t)[0] == make_arityval(2) &&
- is_atom(tmp = tuple_val(t)[1]) &&
- tmp == am_const) {
+ Eterm* tpl = tuple_val(t);
+ Uint i,n;
+
+ if (is_hashmap_node) {
+ /* hashmap collision node, no matchspec syntax here */
+ }
+ else if (tpl[0] == make_arityval(1) && is_tuple(tpl[1])) {
+ tpl = tuple_val(tpl[1]);
+ }
+ else if (tpl[0] == make_arityval(2) && tpl[1] == am_const) {
sum += size_object(tuple_val(t)[2]);
- } else {
+ break;
+ }
+ else {
erts_exit(ERTS_ERROR_EXIT,"Internal error, sizing unrecognized object in "
"(d)ets:match compilation.");
}
+
+ n = arityval(tpl[0]);
+ sum += 1 + n;
+ for (i = 1; i <= n; ++i)
+ sum += my_size_object(tpl[i], 0);
break;
} else if (is_map(t)) {
if (is_flatmap(t)) {
@@ -5290,7 +5296,7 @@ static Uint my_size_object(Eterm t)
n = arityval(p[0]);
sum += 1 + n;
for (int i = 1; i <= n; ++i)
- sum += my_size_object(p[i]);
+ sum += my_size_object(p[i], 0);
/* Calculate size of values */
p = (Eterm *)mp;
@@ -5298,18 +5304,19 @@ static Uint my_size_object(Eterm t)
sum += n + 3;
p += 3; /* hdr + size + keys words */
while (n--) {
- sum += my_size_object(*p++);
+ sum += my_size_object(*p++, 0);
}
} else {
Eterm *head = (Eterm *)hashmap_val(t);
Eterm hdr = *head;
Uint sz;
+
sz = hashmap_bitcount(MAP_HEADER_VAL(hdr));
sum += 1 + sz + header_arity(hdr);
head += 1 + header_arity(hdr);
while(sz-- > 0) {
- sum += my_size_object(head[sz]);
+ sum += my_size_object(head[sz], 1);
}
}
break;
@@ -5322,42 +5329,50 @@ static Uint my_size_object(Eterm t)
return sum;
}
-static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap)
+static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap,
+ int is_hashmap_node)
{
Eterm ret = NIL, a, b;
Eterm *p;
Uint sz;
switch (t & _TAG_PRIMARY_MASK) {
case TAG_PRIMARY_LIST:
- a = my_copy_struct(CAR(list_val(t)), hp, off_heap);
- b = my_copy_struct(CDR(list_val(t)), hp, off_heap);
+ a = my_copy_struct(CAR(list_val(t)), hp, off_heap, 0);
+ b = my_copy_struct(CDR(list_val(t)), hp, off_heap, 0);
ret = CONS(*hp, a, b);
*hp += 2;
break;
case TAG_PRIMARY_BOXED:
if (is_tuple(t)) {
- if (tuple_val(t)[0] == make_arityval(1) &&
- is_tuple(a = tuple_val(t)[1])) {
- Uint i,n;
- Eterm *savep = *hp;
- ret = make_tuple(savep);
- p = tuple_val(a);
- n = arityval(p[0]);
- *hp += n + 1;
- *savep++ = make_arityval(n);
- for(i = 1; i <= n; ++i)
- *savep++ = my_copy_struct(p[i], hp, off_heap);
+ Eterm* tpl = tuple_val(t);
+ Uint i,n;
+ Eterm *savep;
+
+ if (is_hashmap_node) {
+ /* hashmap collision node, no matchspec syntax here */
+ }
+ else if (tpl[0] == make_arityval(1) && is_tuple(tpl[1])) {
+ /* A {{...}} expression */
+ tpl = tuple_val(tpl[1]);
}
- else if (tuple_val(t)[0] == make_arityval(2) &&
- tuple_val(t)[1] == am_const) {
+ else if (tpl[0] == make_arityval(2) && tpl[1] == am_const) {
/* A {const, XXX} expression */
- b = tuple_val(t)[2];
+ b = tpl[2];
sz = size_object(b);
ret = copy_struct(b,sz,hp,off_heap);
+ break;
} else {
erts_exit(ERTS_ERROR_EXIT, "Trying to constant-copy non constant expression "
"0x%bex in (d)ets:match compilation.", t);
}
+ savep = *hp;
+ ret = make_tuple(savep);
+ n = arityval(tpl[0]);
+ *hp += n + 1;
+ *savep++ = tpl[0];
+ for(i = 1; i <= n; ++i)
+ *savep++ = my_copy_struct(tpl[i], hp, off_heap, 0);
+
} else if (is_map(t)) {
if (is_flatmap(t)) {
Uint i,n;
@@ -5375,7 +5390,7 @@ static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap)
*hp += n + 1;
*savep++ = make_arityval(n);
for(i = 1; i <= n; ++i)
- *savep++ = my_copy_struct(p[i], hp, off_heap);
+ *savep++ = my_copy_struct(p[i], hp, off_heap, 0);
savep = *hp;
ret = make_flatmap(savep);
@@ -5387,7 +5402,7 @@ static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap)
*savep++ = keys;
p += 3; /* hdr + size + keys words */
for (i = 0; i < n; i++)
- *savep++ = my_copy_struct(p[i], hp, off_heap);
+ *savep++ = my_copy_struct(p[i], hp, off_heap, 0);
erts_usort_flatmap((flatmap_t*)flatmap_val(ret));
} else {
Eterm *head = hashmap_val(t);
@@ -5404,7 +5419,7 @@ static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap)
*savep++ = *head++; /* map size */
for (int i = 0; i < sz; i++) {
- *savep++ = my_copy_struct(head[i],hp,off_heap);
+ *savep++ = my_copy_struct(head[i],hp,off_heap, 1);
}
}
} else {
diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c
index 34c4189fe9..88c74469ad 100644
--- a/erts/emulator/beam/erl_map.c
+++ b/erts/emulator/beam/erl_map.c
@@ -94,8 +94,8 @@ static Uint hashmap_subtree_size(Eterm node);
static Eterm hashmap_delete(Process *p, Uint32 hx, Eterm key, Eterm node, Eterm *value);
static Eterm flatmap_from_validated_list(Process *p, Eterm list, Eterm fill_value, Uint size);
static Eterm hashmap_from_unsorted_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, int reject_dupkeys, ErtsAlcType_t temp_memory_allocator);
-static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, int is_root, ErtsAlcType_t temp_memory_allocator);
-static Eterm hashmap_from_chunked_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, Uint size, int is_root, ErtsAlcType_t temp_memory_allocator);
+static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, ErtsAlcType_t temp_memory_allocator);
+static Eterm hashmap_from_chunked_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, Uint size, ErtsAlcType_t temp_memory_allocator);
static Eterm hashmap_info(Process *p, Eterm node);
static Eterm hashmap_bld_tuple_uint(Uint **hpp, Uint *szp, Uint n, Uint nums[]);
static int hxnodecmp(const void* a, const void* b);
@@ -111,8 +111,7 @@ static int hxnodecmpkey(const void* a, const void* b);
#define maskval(V,L) (((V) >> ((7 - (L))*4)) & 0xf)
#define DBG_PRINT(X)
/*erts_printf X*/
-#define HALLOC_EXTRA_HASHMAP_FROM_CHUNKED_ARRAY 200
-#define HALLOC_EXTRA HALLOC_EXTRA_HASHMAP_FROM_CHUNKED_ARRAY
+#define HALLOC_EXTRA 200
/* *******************************
* ** Yielding C Fun (YCF) Note **
* *******************************
@@ -134,7 +133,6 @@ static int hxnodecmpkey(const void* a, const void* b);
#include "erl_map.ycf.h"
#endif
#define NOT_YCF_YIELDING_VERSION 1
-#undef HALLOC_EXTRA
#define YCF_CONSUME_REDS(X) while(0){}
void erts_init_map(void) {
@@ -270,6 +268,7 @@ BIF_RETTYPE map_get_2(BIF_ALIST_2) {
* means that the code has to follow some restrictions. See note about
* YCF near the top of the file for more information.
*/
+
#ifdef INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS
static BIF_RETTYPE maps_from_keys_2_helper(Process* p, Eterm* bif_args) {
Eterm list = bif_args[0];
@@ -765,7 +764,7 @@ static Eterm hashmap_from_unsorted_array(ErtsHeapFactory* factory,
if (hxns[ix].hx == hxns[ix+1].hx) {
/* find region of equal hash values */
- jx = ix + 1;
+ jx = ix + 2;
while(jx < n && hxns[ix].hx == hxns[jx].hx) { jx++; }
/* find all correct keys from region
* (last in list but now hash sorted so we check highest id instead) */
@@ -808,7 +807,8 @@ static Eterm hashmap_from_unsorted_array(ErtsHeapFactory* factory,
if (cx > 1) {
/* recursive decompose array */
- res = hashmap_from_sorted_unique_array(factory, hxns, cx, 0, temp_memory_allocator);
+ res = hashmap_from_sorted_unique_array(factory, hxns, cx,
+ temp_memory_allocator);
} else {
Eterm *hp;
@@ -835,51 +835,49 @@ static Eterm hashmap_from_unsorted_array(ErtsHeapFactory* factory,
* YCF near the top of the file for more information.
*/
static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory* factory,
- hxnode_t *hxns, Uint n, int lvl,
+ hxnode_t *hxns, Uint n,
ErtsAlcType_t temp_memory_allocator) {
Eterm res = NIL;
- Uint i;
Uint ix;
- Uint jx;
Uint elems;
- Uint32 sw;
- Uint32 hx;
- Eterm val;
hxnode_t *tmp = NULL;
- ASSERT(lvl < 32);
+
ix = 0;
elems = 1;
while (ix < n - 1) {
if (hxns[ix].hx == hxns[ix+1].hx) {
- jx = ix + 1;
- while (jx < n && hxns[ix].hx == hxns[jx].hx) { jx++; }
- tmp = (hxnode_t *)erts_alloc(temp_memory_allocator, ((jx - ix)) * sizeof(hxnode_t));
-
- for(i = 0; i < jx - ix; i++) {
- val = hxns[i + ix].val;
- hx = hashmap_restore_hash(lvl + 8, CAR(list_val(val)));
- swizzle32(sw,hx);
- tmp[i].hx = sw;
- tmp[i].val = val;
- tmp[i].i = i;
- tmp[i].skip = 1;
- }
+ Uint n_colliders;
+ Eterm* hp;
+ Eterm collision_node;
+ Uint jx = ix + 2;
+ Uint i;
+
+ while (jx < n && hxns[ix].hx == hxns[jx].hx)
+ jx++;
+
+ n_colliders = jx - ix;
+ hp = erts_produce_heap(factory, HAMT_COLLISION_NODE_SZ(n_colliders),
+ HALLOC_EXTRA);
+ collision_node = make_tuple(hp);
+
+ *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(n_colliders);
+ for (i = 0; i < n_colliders; i++) {
+ *hp++ = hxns[ix + i].val;
+ ASSERT(i == 0
+ || CMP_TERM(CAR(list_val(hxns[ix+i-1].val)),
+ CAR(list_val(hxns[ix+i].val))) < 0);
+ }
- erts_qsort(tmp, jx - ix, sizeof(hxnode_t), hxnodecmp);
+ hxns[ix].val = collision_node;
+ hxns[ix].skip = n_colliders;
+ ix = jx;
- hxns[ix].skip = jx - ix;
- hxns[ix].val =
- hashmap_from_sorted_unique_array(factory, tmp, jx - ix, lvl + 8, temp_memory_allocator);
- erts_free(temp_memory_allocator, (void *) tmp);
- /* Memory management depend on the statement below */
- tmp = NULL;
- ix = jx;
- if (ix < n) { elems++; }
- continue;
+ if (ix < n) { elems++; }
+ continue;
}
- hxns[ix].skip = 1;
- elems++;
- ix++;
+ hxns[ix].skip = 1;
+ elems++;
+ ix++;
}
YCF_SPECIAL_CODE_START(ON_DESTROY_STATE);
{
@@ -890,14 +888,13 @@ static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory* factory,
}
}
YCF_SPECIAL_CODE_END();
- res = hashmap_from_chunked_array(factory, hxns, elems, n, !lvl, temp_memory_allocator);
+ res = hashmap_from_chunked_array(factory, hxns, elems, n, temp_memory_allocator);
ERTS_FACTORY_HOLE_CHECK(factory);
return res;
}
-#define HALLOC_EXTRA HALLOC_EXTRA_HASHMAP_FROM_CHUNKED_ARRAY
/* **Important Note**
*
* A yielding version of this function is generated with YCF. This
@@ -905,7 +902,7 @@ static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory* factory,
* YCF near the top of the file for more information.
*/
static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns, Uint n,
- Uint size, int is_root,
+ Uint size,
ErtsAlcType_t temp_memory_allocator) {
Uint ix;
Uint d;
@@ -957,16 +954,11 @@ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns
}
slot = maskval(v,0);
- hp = erts_produce_heap(factory, (is_root ? 3 : 2), 0);
+ hp = erts_produce_heap(factory, 3, 0);
- if (is_root) {
- hp[0] = MAP_HEADER_HAMT_HEAD_BITMAP(1 << slot);
- hp[1] = size;
- hp[2] = res;
- } else {
- hp[0] = MAP_HEADER_HAMT_NODE_BITMAP(1 << slot);
- hp[1] = res;
- }
+ hp[0] = MAP_HEADER_HAMT_HEAD_BITMAP(1 << slot);
+ hp[1] = size;
+ hp[2] = res;
return make_hashmap(hp);
}
@@ -1128,15 +1120,11 @@ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns
bp = 1 << slot;
hdr |= bp;
sz = hashmap_bitcount(hdr);
- hp = erts_produce_heap(factory, sz + /* hdr + item */ (is_root ? 2 : 1), 0);
+ hp = erts_produce_heap(factory, sz + /* hdr + item */ 2, 0);
nhp = hp;
- if (is_root) {
- *hp++ = (hdr == 0xffff) ? MAP_HEADER_HAMT_HEAD_ARRAY : MAP_HEADER_HAMT_HEAD_BITMAP(hdr);
- *hp++ = size;
- } else {
- *hp++ = MAP_HEADER_HAMT_NODE_BITMAP(hdr);
- }
+ *hp++ = (hdr == 0xffff) ? MAP_HEADER_HAMT_HEAD_ARRAY : MAP_HEADER_HAMT_HEAD_BITMAP(hdr);
+ *hp++ = size;
*hp++ = res; sz--;
while (sz--) { *hp++ = ESTACK_POP(stack); }
@@ -1148,7 +1136,6 @@ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns
ERTS_FACTORY_HOLE_CHECK(factory);
return res;
}
-#undef HALLOC_EXTRA
static int hxnodecmpkey(const void *va, const void *vb) {
const hxnode_t *a = (const hxnode_t*) va;
@@ -1477,9 +1464,62 @@ BIF_RETTYPE maps_merge_trap_1(BIF_ALIST_1) {
(HashmapMergeContext*) ERTS_MAGIC_BIN_DATA(ctx_bin));
}
-#define HALLOC_EXTRA 200
#define MAP_MERGE_LOOP_FACTOR 8
+static Eterm merge_maybe_collision_node(Process* p,
+ Eterm* srcA, Uint szA,
+ Eterm* srcB, Uint szB,
+ Uint* map_sizep)
+{
+ Eterm *hp;
+ Eterm *hdr_ptr;
+ Eterm *hp_end;
+ Uint arity;
+
+ ERTS_ASSERT(szA >= 1 && szB >= 1);
+ arity = szA + szB;
+ hp = HAlloc(p, HAMT_COLLISION_NODE_SZ(arity));
+ hp_end = hp + HAMT_COLLISION_NODE_SZ(arity);
+ hdr_ptr = hp++;
+
+ while (szA && szB) {
+ Eterm keyA = CAR(list_val(*srcA));
+ Eterm keyB = CAR(list_val(*srcB));
+ const Sint key_cmp = CMP_TERM(keyA, keyB);
+
+ if (key_cmp < 0) {
+ *hp++ = *srcA++;
+ szA--;
+ }
+ else {
+ *hp++ = *srcB++;
+ szB--;
+ if (key_cmp == 0) {
+ srcA++;
+ szA--;
+ arity--;
+ (*map_sizep)--;
+ }
+ }
+ }
+ if (arity == 1) { /* no collision node needed */
+ Eterm ret_cons = hdr_ptr[1];
+ ASSERT(!szA && !szB);
+ HRelease(p, hp_end, hdr_ptr);
+ return ret_cons;
+ }
+
+ for ( ; szA; szA--)
+ *hp++ = *srcA++;
+ for ( ; szB; szB--)
+ *hp++ = *srcB++;
+
+ HRelease(p, hp_end, hp);
+ *hdr_ptr = make_arityval(arity);
+ return make_tuple(hdr_ptr);
+}
+
+
static BIF_RETTYPE hashmap_merge(Process *p, Eterm map_A, Eterm map_B,
int swap_args, HashmapMergeContext* ctx) {
#define PSTACK_TYPE struct HashmapMergePStackType
@@ -1493,6 +1533,7 @@ static BIF_RETTYPE hashmap_merge(Process *p, Eterm map_A, Eterm map_B,
Eterm trap_ret;
Sint initial_reds = (Sint) (ERTS_BIF_REDS_LEFT(p) * MAP_MERGE_LOOP_FACTOR);
Sint reds = initial_reds;
+ Uint coll_szA = 0, coll_szB = 0;
/*
* Strategy: Do depth-first traversal of both trees (at the same time)
@@ -1552,8 +1593,13 @@ recurse:
goto merge_nodes;
}
}
- hx = hashmap_restore_hash(ctx->lvl, keyA);
- sp->abm = 1 << hashmap_index(hx);
+ if (ctx->lvl < HAMT_MAX_LEVEL) {
+ hx = hashmap_restore_hash(ctx->lvl, keyA);
+ sp->abm = 1 << hashmap_index(hx);
+ }
+ else {
+ coll_szA = 1;
+ }
/* keep srcA pointing at the leaf */
}
else { /* A is NODE */
@@ -1562,25 +1608,35 @@ recurse:
ASSERT(is_header(hdrA));
switch (hdrA & _HEADER_MAP_SUBTAG_MASK) {
case HAMT_SUBTAG_HEAD_ARRAY: {
+ ASSERT(ctx->lvl < HAMT_MAX_LEVEL);
sp->srcA++;
sp->abm = 0xffff;
break;
}
case HAMT_SUBTAG_HEAD_BITMAP: sp->srcA++;
case HAMT_SUBTAG_NODE_BITMAP: {
+ ASSERT(ctx->lvl < HAMT_MAX_LEVEL);
sp->abm = MAP_HEADER_VAL(hdrA);
break;
}
- default:
- erts_exit(ERTS_ABORT_EXIT, "bad header %ld\r\n", hdrA);
+ default: /* collision node */
+ ERTS_ASSERT(is_arity_value(hdrA));
+ ASSERT(ctx->lvl == HAMT_MAX_LEVEL);
+ coll_szA = arityval(hdrA);
+ ASSERT(coll_szA >= 2);
}
}
if (is_list(sp->nodeB)) { /* B is LEAF */
Eterm keyB = CAR(list_val(sp->nodeB));
- hx = hashmap_restore_hash(ctx->lvl, keyB);
- sp->bbm = 1 << hashmap_index(hx);
+ if (ctx->lvl < HAMT_MAX_LEVEL) {
+ hx = hashmap_restore_hash(ctx->lvl, keyB);
+ sp->bbm = 1 << hashmap_index(hx);
+ }
+ else {
+ coll_szB = 1;
+ }
/* keep srcB pointing at the leaf */
}
else { /* B is NODE */
@@ -1589,17 +1645,22 @@ recurse:
ASSERT(is_header(hdrB));
switch (hdrB & _HEADER_MAP_SUBTAG_MASK) {
case HAMT_SUBTAG_HEAD_ARRAY: {
+ ASSERT(ctx->lvl < HAMT_MAX_LEVEL);
sp->srcB++;
sp->bbm = 0xffff;
break;
}
case HAMT_SUBTAG_HEAD_BITMAP: sp->srcB++;
case HAMT_SUBTAG_NODE_BITMAP: {
+ ASSERT(ctx->lvl < HAMT_MAX_LEVEL);
sp->bbm = MAP_HEADER_VAL(hdrB);
break;
}
- default:
- erts_exit(ERTS_ABORT_EXIT, "bad header %ld\r\n", hdrB);
+ default: /* collision node */
+ ERTS_ASSERT(is_arity_value(hdrB));
+ ASSERT(ctx->lvl == HAMT_MAX_LEVEL);
+ coll_szB = arityval(hdrB);
+ ASSERT(coll_szB >= 2);
}
}
}
@@ -1623,13 +1684,22 @@ merge_nodes:
sp->srcA++;
sp->srcB++;
}
- } else { /* Start build a node */
+ }
+ else if (ctx->lvl < HAMT_MAX_LEVEL) { /* Start build a node */
sp->ix = 0;
sp->rbm = sp->abm | sp->bbm;
ASSERT(!(sp->rbm == 0 && ctx->lvl > 0));
}
+ else {
+ res = merge_maybe_collision_node(p, sp->srcA, coll_szA,
+ sp->srcB, coll_szB, &ctx->size);
+ sp->mix = 3;
+ coll_szA = coll_szB = 0;
+ continue;
+ }
if (--reds <= 0) {
+ ASSERT(!coll_szA && !coll_szB);
goto trap;
}
resume_from_trap:
@@ -1773,21 +1843,14 @@ static int hash_cmp(Uint32 ha, Uint32 hb)
int hashmap_key_hash_cmp(Eterm* ap, Eterm* bp)
{
- unsigned int lvl = 0;
-
if (ap && bp) {
+ Uint32 ha, hb;
ASSERT(CMP_TERM(CAR(ap), CAR(bp)) != 0);
- for (;;) {
- Uint32 ha = hashmap_restore_hash(lvl, CAR(ap));
- Uint32 hb = hashmap_restore_hash(lvl, CAR(bp));
- int cmp = hash_cmp(ha, hb);
- if (cmp) {
- return cmp;
- }
- lvl += 8;
- }
+ ha = hashmap_make_hash(CAR(ap));
+ hb = hashmap_make_hash(CAR(bp));
+ return hash_cmp(ha, hb);
}
-
+ ASSERT(ap || bp);
return ap ? -1 : 1;
}
@@ -2220,7 +2283,9 @@ Uint hashmap_node_size(Eterm hdr, Eterm **nodep)
ASSERT(sz < 17);
break;
default:
- erts_exit(ERTS_ABORT_EXIT, "bad header");
+ ERTS_ASSERT(is_arity_value(hdr));
+ sz = arityval(hdr);
+ break;
}
return sz;
}
@@ -2314,7 +2379,7 @@ Eterm* hashmap_iterator_prev(ErtsWStack* s) {
const Eterm *
erts_hashmap_get(Uint32 hx, Eterm key, Eterm node)
{
- Eterm *ptr, hdr, *res;
+ Eterm *ptr, hdr;
Uint ix, lvl = 0;
Uint32 hval,bp;
@@ -2325,15 +2390,16 @@ erts_hashmap_get(Uint32 hx, Eterm key, Eterm node)
ASSERT(is_hashmap_header_head(hdr));
ptr++;
- for (;;) {
+ do {
+ ASSERT(lvl == 0 || is_hashmap_header_node(hdr));
+
hval = MAP_HEADER_VAL(hdr);
ix = hashmap_index(hx);
if (hval != 0xffff) {
bp = 1 << ix;
if (!(bp & hval)) {
/* not occupied */
- res = NULL;
- break;
+ return NULL;
}
ix = hashmap_bitcount(hval & (bp - 1));
}
@@ -2341,8 +2407,7 @@ erts_hashmap_get(Uint32 hx, Eterm key, Eterm node)
if (is_list(node)) { /* LEAF NODE [K|V] */
ptr = list_val(node);
- res = EQ(CAR(ptr), key) ? &(CDR(ptr)) : NULL;
- break;
+ return EQ(CAR(ptr), key) ? &(CDR(ptr)) : NULL;
}
hx = hashmap_shift_hash(hx,lvl,key);
@@ -2351,10 +2416,17 @@ erts_hashmap_get(Uint32 hx, Eterm key, Eterm node)
ptr = boxed_val(node);
hdr = *ptr;
ASSERT(is_header(hdr));
- ASSERT(!is_hashmap_header_head(hdr));
- }
+ } while (!is_arity_value(hdr));
- return res;
+ /* collision node */
+ ix = arityval(hdr);
+ ASSERT(ix > 1);
+ do {
+ Eterm* kv = list_val(*(++ptr));
+ if (EQ(CAR(kv), key))
+ return &(CDR(kv));
+ } while (--ix > 0);
+ return NULL;
}
Eterm erts_hashmap_insert(Process *p, Uint32 hx, Eterm key, Eterm value,
@@ -2368,6 +2440,8 @@ Eterm erts_hashmap_insert(Process *p, Uint32 hx, Eterm key, Eterm value,
/* We are putting a new value (under a new or existing key) */
hp = HAlloc(p, size);
res = erts_hashmap_insert_up(hp, key, value, upsz, &stack);
+ ASSERT(hashmap_val(res) + 2 + hashmap_bitcount(MAP_HEADER_VAL(*hashmap_val(res)))
+ == hp + size);
}
else {
/* We are putting the same key-value */
@@ -2475,9 +2549,34 @@ int erts_hashmap_insert_down(Uint32 hx, Eterm key, Eterm value, Eterm node, Uint
}
size += HAMT_HEAD_BITMAP_SZ(n+1);
goto unroll;
- default:
- erts_exit(ERTS_ERROR_EXIT, "bad header tag %ld\r\n", hdr & _HEADER_MAP_SUBTAG_MASK);
- break;
+ default:
+ ERTS_ASSERT(is_arity_value(hdr));
+ n = arityval(hdr);
+ ASSERT(n >= 2);
+ for (slot = 0; slot < n; slot++) {
+ Eterm* kv = list_val(ptr[1+slot]);
+ Sint c;
+ ckey = CAR(kv);
+ c = CMP_TERM(key, ckey);
+ if (c == 0) {
+ if (CDR(ptr) == value) {
+ *sz = 0;
+ return 1;
+ }
+ *update_size = 0;
+ size += HAMT_COLLISION_NODE_SZ(n);
+ ESTACK_PUSH3(*sp, slot, 0, node);
+ goto unroll;
+ }
+ if (c < 0)
+ break;
+ }
+ if (is_update) {
+ return 0;
+ }
+ size += HAMT_COLLISION_NODE_SZ(n+1);
+ ESTACK_PUSH3(*sp, slot, 1, node);
+ goto unroll;
}
break;
default:
@@ -2486,21 +2585,25 @@ int erts_hashmap_insert_down(Uint32 hx, Eterm key, Eterm value, Eterm node, Uint
}
}
insert_subnodes:
- clvl = lvl;
- chx = hashmap_restore_hash(clvl,ckey);
- size += HAMT_NODE_BITMAP_SZ(2);
- ix = hashmap_index(hx);
- cix = hashmap_index(chx);
-
- while (cix == ix) {
- ESTACK_PUSH4(*sp, 0, 1 << ix, 0, MAP_HEADER_HAMT_NODE_BITMAP(0));
- size += HAMT_NODE_BITMAP_SZ(1);
- hx = hashmap_shift_hash(hx,lvl,key);
- chx = hashmap_shift_hash(chx,clvl,ckey);
- ix = hashmap_index(hx);
- cix = hashmap_index(chx);
- }
- ESTACK_PUSH3(*sp, cix, ix, node);
+ if (lvl < HAMT_MAX_LEVEL) {
+ clvl = lvl;
+ chx = hashmap_restore_hash(clvl,ckey);
+ do {
+ ix = hashmap_index(hx);
+ cix = hashmap_index(chx);
+ if (cix != ix) {
+ size += HAMT_NODE_BITMAP_SZ(2);
+ ESTACK_PUSH4(*sp, cix, ix, 0, node);
+ goto unroll;
+ }
+ ESTACK_PUSH4(*sp, 0, 1 << ix, 0, MAP_HEADER_HAMT_NODE_BITMAP(0));
+ size += HAMT_NODE_BITMAP_SZ(1);
+ hx = hashmap_shift_hash(hx,lvl,key);
+ chx = hashmap_shift_hash(chx,clvl,ckey);
+ } while (lvl < HAMT_MAX_LEVEL);
+ }
+ size += HAMT_COLLISION_NODE_SZ(2);
+ ESTACK_PUSH2(*sp, 1, node);
unroll:
*sz = size + /* res cons */ 2;
@@ -2514,17 +2617,29 @@ Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value,
Eterm *nhp = NULL;
Uint32 ix, cix, bp, hval;
Uint slot, n;
- /* Needed for halfword */
- DeclareTmpHeapNoproc(fake,1);
- UseTmpHeapNoproc(1);
+ Eterm fake;
res = CONS(hp, key, value); hp += 2;
do {
node = ESTACK_POP(*sp);
switch(primary_tag(node)) {
- case TAG_PRIMARY_LIST:
- ix = (Uint32) ESTACK_POP(*sp);
+ case TAG_PRIMARY_LIST: {
+ const int is_collision_node = (int) ESTACK_POP(*sp);
+ if (is_collision_node) {
+ nhp = hp;
+ *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(2);
+ if (CMP_TERM(key, CAR(list_val(node))) < 0){
+ *hp++ = res;
+ *hp++ = node;
+ } else {
+ *hp++ = node;
+ *hp++ = res;
+ }
+ res = make_hashmap(nhp);
+ break;
+ }
+ ix = (Uint32)ESTACK_POP(*sp);
cix = (Uint32) ESTACK_POP(*sp);
nhp = hp;
@@ -2538,10 +2653,11 @@ Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value,
}
res = make_hashmap(nhp);
break;
+ }
case TAG_PRIMARY_HEADER:
/* subnodes, fake it */
- *fake = node;
- node = make_boxed(fake);
+ fake = node;
+ node = make_boxed(&fake);
case TAG_PRIMARY_BOXED:
ptr = boxed_val(node);
hdr = *ptr;
@@ -2594,9 +2710,27 @@ Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value,
}
res = make_hashmap(nhp);
break;
- default:
- erts_exit(ERTS_ERROR_EXIT, "bad header tag %x\r\n", hdr & _HEADER_MAP_SUBTAG_MASK);
- break;
+ default: {
+ int is_insert;
+ ERTS_ASSERT(is_arity_value(hdr));
+ n = arityval(hdr);
+ ASSERT(n >= 2);
+ is_insert = (int) ESTACK_POP(*sp);
+ slot = (Uint) ESTACK_POP(*sp);
+ nhp = hp;
+ n += is_insert;
+ *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(n); ptr++;
+ ix = 0;
+ while (ix++ < slot)
+ *hp++ = *ptr++;
+ *hp++ = res;
+ if (!is_insert)
+ ptr++;
+ while (ix++ < n)
+ *hp++ = *ptr++;
+ res = make_hashmap(nhp);
+ break;
+ }
}
break;
default:
@@ -2775,9 +2909,22 @@ static Eterm hashmap_delete(Process *p, Uint32 hx, Eterm key,
/* not occupied */
res = THE_NON_VALUE;
goto not_found;
- default:
- erts_exit(ERTS_ERROR_EXIT, "bad header tag %ld\r\n", hdr & _HEADER_MAP_SUBTAG_MASK);
- break;
+ default: /* collision node */
+ ERTS_ASSERT(is_arity_value(hdr));
+ n = arityval(hdr);
+ ASSERT(n >= 2);
+ for (slot = 0; slot < n; slot++) {
+ Eterm* kv = list_val(ptr[1+slot]);
+ if (EQ(key, CAR(kv))) {
+ if (value)
+ *value = CDR(kv);
+ ESTACK_PUSH2(stack, slot, node);
+ size += HAMT_COLLISION_NODE_SZ(n);
+ goto unroll;
+ }
+ }
+ res = THE_NON_VALUE;
+ goto not_found;
}
break;
default:
@@ -2959,8 +3106,25 @@ unroll:
}
res = make_hashmap(nhp);
break;
- default:
- erts_exit(ERTS_ERROR_EXIT, "bad header tag %x\r\n", hdr & _HEADER_MAP_SUBTAG_MASK);
+ default: /* collision node */
+ ERTS_ASSERT(is_arity_value(hdr));
+ n = arityval(hdr);
+ ASSERT(n >= 2);
+ slot = (Uint) ESTACK_POP(stack);
+ ASSERT(slot < n);
+ if (n > 2) { /* Shrink collision node */
+ nhp = hp;
+ *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(n-1); ptr++;
+ n -= slot + 1;
+ while (slot--) { *hp++ = *ptr++; }
+ ptr++;
+ while(n--) { *hp++ = *ptr++; }
+ res = make_hashmap(nhp);
+ }
+ else { /* Collapse collision node */
+ ASSERT(res == THE_NON_VALUE);
+ res = ptr[1 + (1-slot)];
+ }
break;
}
} while(!ESTACK_ISEMPTY(stack));
@@ -3228,8 +3392,10 @@ BIF_RETTYPE erts_internal_map_hashmap_children_1(BIF_ALIST_1) {
sz = 16;
ptr += 2;
break;
- default:
- erts_exit(ERTS_ERROR_EXIT, "bad header\r\n");
+ default: /* collision node */
+ ERTS_ASSERT(is_arity_value(hdr));
+ sz = arityval(hdr);
+ ASSERT(sz >= 2);
break;
}
ASSERT(sz < 17);
@@ -3251,15 +3417,17 @@ static Eterm hashmap_info(Process *p, Eterm node) {
DECL_AM(leafs);
DECL_AM(bitmaps);
DECL_AM(arrays);
- Uint nleaf=0, nbitmap=0, narray=0;
- Uint bitmap_usage[16], leaf_usage[16];
- Uint lvl = 0, clvl;
+ DECL_AM(collisions);
+ Uint nleaf=0, nbitmap=0, narray=0, ncollision = 0;
+ Uint bitmap_usage[16];
+ Uint collision_usage[16];
+ Uint leaf_usage[HAMT_MAX_LEVEL + 2];
+ Uint max_depth = 0, clvl;
DECLARE_ESTACK(stack);
- for (sz = 0; sz < 16; sz++) {
- bitmap_usage[sz] = 0;
- leaf_usage[sz] = 0;
- }
+ sys_memzero(bitmap_usage, sizeof(bitmap_usage));
+ sys_memzero(collision_usage, sizeof(collision_usage));
+ sys_memzero(leaf_usage, sizeof(leaf_usage));
ptr = boxed_val(node);
ESTACK_PUSH(stack, 0);
@@ -3267,8 +3435,6 @@ static Eterm hashmap_info(Process *p, Eterm node) {
do {
node = ESTACK_POP(stack);
clvl = ESTACK_POP(stack);
- if (lvl < clvl)
- lvl = clvl;
switch(primary_tag(node)) {
case TAG_PRIMARY_LIST:
nleaf++;
@@ -3284,45 +3450,49 @@ static Eterm hashmap_info(Process *p, Eterm node) {
sz = hashmap_bitcount(MAP_HEADER_VAL(hdr));
ASSERT(sz < 17);
bitmap_usage[sz-1] += 1;
- while(sz--) {
- ESTACK_PUSH(stack, clvl + 1);
- ESTACK_PUSH(stack, ptr[sz+1]);
- }
break;
case HAMT_SUBTAG_HEAD_BITMAP:
nbitmap++;
sz = hashmap_bitcount(MAP_HEADER_VAL(hdr));
bitmap_usage[sz-1] += 1;
- while(sz--) {
- ESTACK_PUSH(stack, clvl + 1);
- ESTACK_PUSH(stack, ptr[sz+2]);
- }
+ ptr++;
break;
case HAMT_SUBTAG_HEAD_ARRAY:
narray++;
sz = 16;
- while(sz--) {
- ESTACK_PUSH(stack, clvl + 1);
- ESTACK_PUSH(stack, ptr[sz+2]);
- }
- break;
- default:
- erts_exit(ERTS_ERROR_EXIT, "bad header\r\n");
+ ptr++;
break;
+ default: /* collision node */
+ ERTS_ASSERT(is_arity_value(hdr));
+ ncollision++;
+ sz = arityval(hdr);
+ ASSERT(sz >= 2);
+ collision_usage[(sz > 16 ? 16 : sz) - 1] += 1;
+ break;
}
+ ASSERT(sz >= 1);
+ clvl++;
+ ASSERT(clvl <= HAMT_MAX_LEVEL+1);
+ if (max_depth < clvl)
+ max_depth = clvl;
+ while(sz--) {
+ ESTACK_PUSH(stack, clvl);
+ ESTACK_PUSH(stack, ptr[sz+1]);
+ }
}
} while(!ESTACK_ISEMPTY(stack));
/* size */
sz = 0;
- hashmap_bld_tuple_uint(NULL,&sz,16,leaf_usage);
- hashmap_bld_tuple_uint(NULL,&sz,16,bitmap_usage);
+ hashmap_bld_tuple_uint(NULL, &sz, HAMT_MAX_LEVEL+2, leaf_usage);
+ hashmap_bld_tuple_uint(NULL, &sz, 16, bitmap_usage);
+ hashmap_bld_tuple_uint(NULL, &sz, 16, collision_usage);
/* alloc */
- hp = HAlloc(p, 2+3 + 3*(2+4) + sz);
+ hp = HAlloc(p, 2+3 + 4*(2+4) + sz);
- info = hashmap_bld_tuple_uint(&hp,NULL,16,leaf_usage);
+ info = hashmap_bld_tuple_uint(&hp, NULL, HAMT_MAX_LEVEL+2, leaf_usage);
tup = TUPLE3(hp, AM_leafs, make_small(nleaf),info); hp += 4;
res = CONS(hp, tup, res); hp += 2;
@@ -3330,10 +3500,14 @@ static Eterm hashmap_info(Process *p, Eterm node) {
tup = TUPLE3(hp, AM_bitmaps, make_small(nbitmap), info); hp += 4;
res = CONS(hp, tup, res); hp += 2;
+ info = hashmap_bld_tuple_uint(&hp, NULL, 16, collision_usage);
+ tup = TUPLE3(hp, AM_collisions, make_small(ncollision), info); hp += 4;
+ res = CONS(hp, tup, res); hp += 2;
+
tup = TUPLE3(hp, AM_arrays, make_small(narray),NIL); hp += 4;
res = CONS(hp, tup, res); hp += 2;
- tup = TUPLE2(hp, AM_depth, make_small(lvl)); hp += 3;
+ tup = TUPLE2(hp, AM_depth, make_small(max_depth)); hp += 3;
res = CONS(hp, tup, res); hp += 2;
DESTROY_ESTACK(stack);
@@ -3361,12 +3535,16 @@ static Eterm hashmap_bld_tuple_uint(Uint **hpp, Uint *szp, Uint n, Uint nums[])
* Since each hashmap node can only be up to 16 elements
* large we use 4 bits per level in the path.
*
- * So a Path with value 0x110 will first get the 0:th
+ * So a Path with value 0x210 will first get the 0:th
* slot in the head node, and then the 1:st slot in the
- * resulting node and then finally the 1:st slot in the
+ * resulting node and then finally the 2:st slot in the
* node beneath. If that slot is not a leaf, then the path
* continues down the 0:th slot until it finds a leaf.
*
+ * Collision nodes may (theoretically and in debug) have more
+ * than 16 elements. To not complicate the 4-bit path format
+ * we avoid yielding in collision nodes.
+ *
* Once the leaf has been found, the return value is created
* by traversing the tree using the the stack that was built
* when searching for the first leaf to return.
@@ -3455,7 +3633,12 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) {
Uint path_length = 0;
Uint *path_rest = NULL;
int i, elems, orig_elems;
- Eterm node = map, res, *patch_ptr = NULL, *hp;
+ Eterm node = map, res, *patch_ptr = NULL;
+ Eterm *hp = NULL;
+ Eterm *hp_end;
+ Eterm *ptr;
+ Uint sz, words_per_elem;
+ Uint idx;
/* A stack WSTACK is used when traversing the hashmap.
* It contains: node, idx, sz, ptr
@@ -3513,61 +3696,29 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) {
BIF_ERROR(BIF_P, BADARG);
}
- if (type == iterator) {
- /*
- * Iterator uses the format {K1, V1, {K2, V2, {K3, V3, [Path | Map]}}},
- * so each element is 4 words large.
- * To make iteration order independent of input reductions
- * the KV-pairs are here built in DESTRUCTIVE non-reverse order.
- */
- hp = HAlloc(BIF_P, 4 * elems);
- } else {
- /*
- * List used the format [Path, Map, {K3,V3}, {K2,V2}, {K1,V1} | BIF_ARG_3],
- * so each element is 2+3 words large.
- * To make list order independent of input reductions
- * the KV-pairs are here built in FUNCTIONAL reverse order
- * as this is how the list as a whole is constructed.
- */
- hp = HAlloc(BIF_P, (2 + 3) * elems);
- }
-
- orig_elems = elems;
-
/* First we look for the leaf to start at using the
path given. While doing so, we push each map node
and the index onto the stack to use later. */
for (i = 1; ; i++) {
- Eterm *ptr = hashmap_val(node),
- hdr = *ptr++;
- Uint sz;
+ Eterm hdr;
+
+ ptr = hashmap_val(node);
+ hdr = *ptr++;
sz = hashmap_node_size(hdr, &ptr);
- if (PATH_ELEM(curr_path) >= sz)
+ idx = PATH_ELEM(curr_path);
+ if (idx >= sz)
goto badarg;
- WSTACK_PUSH4(stack, node, PATH_ELEM(curr_path)+1, sz, (UWord)ptr);
-
- /* We have found a leaf, return it and the next X elements */
- if (is_list(ptr[PATH_ELEM(curr_path)])) {
- Eterm *lst = list_val(ptr[PATH_ELEM(curr_path)]);
- if (type == iterator) {
- res = make_tuple(hp);
- hp[0] = make_arityval(3);
- hp[1] = CAR(lst);
- hp[2] = CDR(lst);
- patch_ptr = &hp[3];
- hp += 4;
- } else {
- Eterm tup = TUPLE2(hp, CAR(lst), CDR(lst)); hp += 3;
- res = CONS(hp, tup, BIF_ARG_3); hp += 2;
- }
- elems--;
+ if (is_list(ptr[idx])) {
+ /* We have found a leaf, return it and the next X elements */
break;
}
- node = ptr[PATH_ELEM(curr_path)];
+ WSTACK_PUSH4(stack, node, idx+1, sz, (UWord)ptr);
+
+ node = ptr[idx];
curr_path >>= PATH_ELEM_SIZE;
@@ -3585,12 +3736,50 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) {
}
}
+ if (type == iterator) {
+ /*
+ * Iterator uses the format {K1, V1, {K2, V2, {K3, V3, [Path | Map]}}},
+ * so each element is 4 words large.
+ * To make iteration order independent of input reductions
+ * the KV-pairs are here built in DESTRUCTIVE non-reverse order.
+ */
+ words_per_elem = 4;
+ patch_ptr = &res;
+ } else {
+ /*
+ * List used the format [Path, Map, {K3,V3}, {K2,V2}, {K1,V1} | BIF_ARG_3],
+ * so each element is 2+3 words large.
+ * To make list order independent of input reductions
+ * the KV-pairs are here built in FUNCTIONAL reverse order
+ * as this is how the list as a whole is constructed.
+ */
+ words_per_elem = 2 + 3;
+ res = BIF_ARG_3;
+ }
+ hp = HAlloc(BIF_P, words_per_elem * elems);
+ hp_end = hp + words_per_elem * elems;
+
+ orig_elems = elems;
+
/* We traverse the hashmap and return at most `elems` elements */
while(1) {
- Eterm *ptr = (Eterm*)WSTACK_POP(stack);
- Uint sz = (Uint)WSTACK_POP(stack);
- Uint idx = (Uint)WSTACK_POP(stack);
- Eterm node = (Eterm)WSTACK_POP(stack);
+
+ if (idx == 0) {
+ if (elems < sz && is_arity_value(*hashmap_val(node))) {
+ /*
+ * This is a collision node!
+ * Make sure 'elems' is large enough not to yield in the
+ * middle of it. Collision nodes may be larger than 16
+ * and that would complicate the 4-bit path format.
+ */
+ elems = sz;
+ HRelease(BIF_P, hp_end, hp);
+ hp = HAlloc(BIF_P, words_per_elem * elems);
+ hp_end = hp + words_per_elem * elems;
+ }
+ }
+ else
+ ASSERT(!is_arity_value(*hashmap_val(node)));
while (idx < sz && elems != 0 && is_list(ptr[idx])) {
Eterm *lst = list_val(ptr[idx]);
@@ -3609,6 +3798,8 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) {
idx++;
}
+ ASSERT(idx == sz || !is_arity_value(*hashmap_val(node)));
+
if (elems == 0) {
if (idx < sz) {
/* There are more elements in this node to explore */
@@ -3627,26 +3818,29 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) {
}
}
break;
- } else {
- if (idx < sz) {
- Eterm hdr;
- /* Push next idx in current node */
- WSTACK_PUSH4(stack, node, idx+1, sz, (UWord)ptr);
-
- /* Push first idx in child node */
- node = ptr[idx];
- ptr = hashmap_val(ptr[idx]);
- hdr = *ptr++;
- sz = hashmap_node_size(hdr, &ptr);
- WSTACK_PUSH4(stack, node, 0, sz, (UWord)ptr);
- }
}
-
- /* There are no more element in the hashmap */
- if (WSTACK_ISEMPTY(stack)) {
+ else if (idx < sz) {
+ Eterm hdr;
+ /* Push next idx in current node */
+ WSTACK_PUSH4(stack, node, idx+1, sz, (UWord)ptr);
+
+ /* Continue with first idx in child node */
+ node = ptr[idx];
+ ptr = hashmap_val(ptr[idx]);
+ hdr = *ptr++;
+ sz = hashmap_node_size(hdr, &ptr);
+ idx = 0;
+ }
+ else if (!WSTACK_ISEMPTY(stack)) {
+ ptr = (Eterm*)WSTACK_POP(stack);
+ sz = (Uint)WSTACK_POP(stack);
+ idx = (Uint)WSTACK_POP(stack);
+ node = (Eterm)WSTACK_POP(stack);
+ }
+ else {
+ /* There are no more element in the hashmap */
break;
}
-
}
if (!WSTACK_ISEMPTY(stack)) {
@@ -3705,24 +3899,16 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) {
res = CONS(hp, path, res); hp += 2;
}
} else {
- if (type == iterator) {
+ if (type == iterator)
*patch_ptr = am_none;
- HRelease(BIF_P, hp + 4 * elems, hp);
- } else {
- HRelease(BIF_P, hp + (2+3) * elems, hp);
- }
+ HRelease(BIF_P, hp_end, hp);
}
BIF_P->fcalls -= 4 * (orig_elems - elems);
DESTROY_WSTACK(stack);
BIF_RET(res);
badarg:
- if (type == iterator) {
- HRelease(BIF_P, hp + 4 * elems, hp);
- } else {
- HRelease(BIF_P, hp + (2+3) * elems, hp);
- }
- BIF_P->fcalls -= 4 * (orig_elems - elems);
+ ASSERT(hp == NULL);
DESTROY_WSTACK(stack);
BIF_ERROR(BIF_P, BADARG);
}
diff --git a/erts/emulator/beam/erl_map.h b/erts/emulator/beam/erl_map.h
index b451bad7e1..9ecacb8aa0 100644
--- a/erts/emulator/beam/erl_map.h
+++ b/erts/emulator/beam/erl_map.h
@@ -56,17 +56,15 @@ typedef struct flatmap_s {
/* the head-node is a bitmap or array with an untagged size */
#define hashmap_size(x) (((hashmap_head_t*) hashmap_val(x))->size)
-#define hashmap_make_hash(Key) make_map_hash(Key, 0)
+#define hashmap_make_hash(Key) make_map_hash(Key)
#define hashmap_restore_hash(Lvl, Key) \
- (((Lvl) < 8) ? \
- hashmap_make_hash(Key) >> (4*(Lvl)) : \
- make_map_hash(Key, ((Lvl) >> 3)) >> (4 * ((Lvl) & 7)))
+ (ASSERT(Lvl < 8), \
+ hashmap_make_hash(Key) >> (4*(Lvl)))
#define hashmap_shift_hash(Hx, Lvl, Key) \
- (((++(Lvl)) & 7) ? \
- (Hx) >> 4 : \
- make_map_hash(Key, ((Lvl) >> 3)))
+ (++(Lvl), ASSERT(Lvl <= HAMT_MAX_LEVEL), /* we allow one level too much */\
+ (Hx) >> 4)
/* erl_term.h stuff */
#define flatmap_get_values(x) (((Eterm *)(x)) + sizeof(flatmap_t)/sizeof(Eterm))
@@ -147,7 +145,8 @@ typedef struct hashmap_head_s {
/* erl_map.h stuff */
-#define is_hashmap_header_head(x) ((MAP_HEADER_TYPE(x) & (0x2)))
+#define is_hashmap_header_head(x) (MAP_HEADER_TYPE(x) & (0x2))
+#define is_hashmap_header_node(x) (MAP_HEADER_TYPE(x) == 1)
#define MAKE_MAP_HEADER(Type,Arity,Val) \
(_make_header(((((Uint16)(Val)) << MAP_HEADER_ARITY_SZ) | (Arity)) << MAP_HEADER_TAG_SZ | (Type) , _TAG_HEADER_MAP))
@@ -164,12 +163,21 @@ typedef struct hashmap_head_s {
#define MAP_HEADER_HAMT_NODE_BITMAP(Bmp) \
MAKE_MAP_HEADER(MAP_HEADER_TAG_HAMT_NODE_BITMAP,0x0,Bmp)
+#define MAP_HEADER_HAMT_COLLISION_NODE(Arity) make_arityval(Arity)
+
#define MAP_HEADER_FLATMAP_SZ (sizeof(flatmap_t) / sizeof(Eterm))
#define HAMT_NODE_ARRAY_SZ (17)
#define HAMT_HEAD_ARRAY_SZ (18)
#define HAMT_NODE_BITMAP_SZ(n) (1 + n)
#define HAMT_HEAD_BITMAP_SZ(n) (2 + n)
+#define HAMT_COLLISION_NODE_SZ(n) (1 + n)
+/*
+ * Collision nodes are used when all hash bits have been exhausted.
+ * They are normal tuples of arity 2 or larger. The elements of a collision
+ * node tuple contain key-value cons cells like the other nodes,
+ * but they are sorted in map-key order.
+ */
/* 2 bits maps tag + 4 bits subtag + 2 ignore bits */
#define _HEADER_MAP_SUBTAG_MASK (0xfc)
@@ -183,11 +191,17 @@ typedef struct hashmap_head_s {
#define hashmap_index(hash) (((Uint32)hash) & 0xf)
+#define HAMT_MAX_LEVEL 8
+
/* hashmap heap size:
[one cons cell + one list term in parent node] per key
[one header + one boxed term in parent node] per inner node
[one header + one size word] for root node
Observed average number of nodes per key is about 0.35.
+
+ Amendment: This size estimation does not take collision nodes into account.
+ It should be good enough though, as collision nodes are rare
+ and only make the size smaller compared to unlimited HAMT depth.
*/
#define HASHMAP_WORDS_PER_KEY 3
#define HASHMAP_WORDS_PER_NODE 2
diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h
index 070d6aef48..a4624783df 100644
--- a/erts/emulator/beam/erl_utils.h
+++ b/erts/emulator/beam/erl_utils.h
@@ -71,9 +71,15 @@ Sint erts_list_length(Eterm);
int erts_is_builtin(Eterm, Eterm, int);
Uint32 make_hash2(Eterm);
Uint32 trapping_make_hash2(Eterm, Eterm*, struct process*);
+#ifdef DEBUG
+# define DBG_HASHMAP_COLLISION_BONANZA
+#endif
+#ifdef DBG_HASHMAP_COLLISION_BONANZA
+Uint32 erts_dbg_hashmap_collision_bonanza(Uint32 hash, Eterm key);
+#endif
Uint32 make_hash(Eterm);
Uint32 make_internal_hash(Eterm, Uint32 salt);
-Uint32 make_map_hash(Eterm key, int level);
+Uint32 make_map_hash(Eterm key);
void erts_save_emu_args(int argc, char **argv);
Eterm erts_get_emu_args(struct process *c_p);
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index 67584e38ec..cfac7a5290 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -3035,7 +3035,6 @@ dec_pid(ErtsDistExternal *edep, ErtsHeapFactory* factory, const byte* ep,
#define ENC_BIN_COPY ((Eterm) 3)
#define ENC_MAP_PAIR ((Eterm) 4)
#define ENC_HASHMAP_NODE ((Eterm) 5)
-#define ENC_STORE_MAP_ELEMENT ((Eterm) 6)
#define ENC_START_SORTING_MAP ((Eterm) 7)
#define ENC_CONTINUE_SORTING_MAP ((Eterm) 8)
#define ENC_PUSH_SORTED_MAP ((Eterm) 9)
@@ -3193,18 +3192,32 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep,
case ENC_HASHMAP_NODE:
if (is_list(obj)) { /* leaf node [K|V] */
ptr = list_val(obj);
+ if (dflags & DFLAG_DETERMINISTIC) {
+ *next_map_element++ = CAR(ptr);
+ *next_map_element++ = CDR(ptr);
+ goto outer_loop;
+ }
WSTACK_PUSH2(s, ENC_TERM, CDR(ptr));
obj = CAR(ptr);
}
- break;
- case ENC_STORE_MAP_ELEMENT: /* option `deterministic` */
- if (is_list(obj)) { /* leaf node [K|V] */
- ptr = list_val(obj);
- *next_map_element++ = CAR(ptr);
- *next_map_element++ = CDR(ptr);
+ else if (is_tuple(obj)) { /* collision node */
+ Uint tpl_sz;
+ ptr = tuple_val(obj);
+ tpl_sz = arityval(*ptr);
+ ASSERT(tpl_sz >= 2);
+ ptr++;
+ WSTACK_RESERVE(s, tpl_sz * 2);
+ while(tpl_sz--) {
+ ASSERT(is_list(*ptr));
+ WSTACK_FAST_PUSH(s, ENC_HASHMAP_NODE);
+ WSTACK_FAST_PUSH(s, *ptr++);
+ }
goto outer_loop;
- }
- break;
+ }
+ else
+ ASSERT((*boxed_val(obj) & _HEADER_MAP_SUBTAG_MASK)
+ == HAMT_SUBTAG_NODE_BITMAP);
+ break;
case ENC_START_SORTING_MAP: /* option `deterministic` */
{
long num_reductions = r;
@@ -3489,7 +3502,6 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep,
} else {
Eterm hdr;
Uint node_sz;
- Eterm node_processor;
ptr = boxed_val(obj);
hdr = *ptr;
ASSERT(is_header(hdr));
@@ -3530,17 +3542,14 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep,
node_sz = hashmap_bitcount(MAP_HEADER_VAL(hdr));
ASSERT(node_sz < 17);
break;
- default:
- erts_exit(ERTS_ERROR_EXIT, "bad header\r\n");
- }
-
+ default:
+ erts_exit(ERTS_ERROR_EXIT, "bad header\r\n");
+ }
ptr++;
- node_processor = (dflags & DFLAG_DETERMINISTIC) ?
- ENC_STORE_MAP_ELEMENT : ENC_HASHMAP_NODE;
WSTACK_RESERVE(s, node_sz*2);
while(node_sz--) {
- WSTACK_FAST_PUSH(s, node_processor);
- WSTACK_FAST_PUSH(s, *ptr++);
+ WSTACK_FAST_PUSH(s, ENC_HASHMAP_NODE);
+ WSTACK_FAST_PUSH(s, *ptr++);
}
}
break;
@@ -5312,8 +5321,8 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
node_sz = hashmap_bitcount(MAP_HEADER_VAL(hdr));
ASSERT(node_sz < 17);
break;
- default:
- erts_exit(ERTS_ERROR_EXIT, "bad header\r\n");
+ default:
+ erts_exit(ERTS_ERROR_EXIT, "bad header\r\n");
}
ptr++;
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index bf8b91e374..17c4bcb19c 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -1616,15 +1616,12 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_
* We therefore calculate context independent hashes for all .
* key-value pairs and then xor them together.
*/
- ESTACK_PUSH(s, hash_xor_pairs);
- ESTACK_PUSH(s, hash);
- ESTACK_PUSH(s, HASH_MAP_TAIL);
+ ESTACK_PUSH3(s, hash_xor_pairs, hash, HASH_MAP_TAIL);
hash = 0;
hash_xor_pairs = 0;
for (ctx.i = ctx.size - 1; ctx.i >= 0; ctx.i--) {
- ESTACK_PUSH(s, HASH_MAP_PAIR);
- ESTACK_PUSH(s, ctx.vs[ctx.i]);
- ESTACK_PUSH(s, ctx.ks[ctx.i]);
+ ESTACK_PUSH3(s, HASH_MAP_PAIR,
+ ctx.vs[ctx.i], ctx.ks[ctx.i]);
TRAP_LOCATION(hamt_subtag_head_flatmap);
}
goto hash2_common;
@@ -1636,9 +1633,7 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_
UINT32_HASH(size, HCONST_16);
if (size == 0)
goto hash2_common;
- ESTACK_PUSH(s, hash_xor_pairs);
- ESTACK_PUSH(s, hash);
- ESTACK_PUSH(s, HASH_MAP_TAIL);
+ ESTACK_PUSH3(s, hash_xor_pairs, hash, HASH_MAP_TAIL);
hash = 0;
hash_xor_pairs = 0;
}
@@ -1656,13 +1651,22 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_
while (ctx.i) {
if (is_list(*ctx.ptr)) {
Eterm* cons = list_val(*ctx.ptr);
- ESTACK_PUSH(s, HASH_MAP_PAIR);
- ESTACK_PUSH(s, CDR(cons));
- ESTACK_PUSH(s, CAR(cons));
+ ESTACK_PUSH3(s, HASH_MAP_PAIR, CDR(cons), CAR(cons));
}
else {
ASSERT(is_boxed(*ctx.ptr));
- ESTACK_PUSH(s, *ctx.ptr);
+ if (is_tuple(*ctx.ptr)) { /* collision node */
+ Eterm *coll_ptr = tuple_val(*ctx.ptr);
+ Uint n = arityval(*coll_ptr);
+ ASSERT(n >= 2);
+ coll_ptr++;
+ for (; n; n--, coll_ptr++) {
+ Eterm* cons = list_val(*coll_ptr);
+ ESTACK_PUSH3(s, HASH_MAP_PAIR, CDR(cons), CAR(cons));
+ }
+ }
+ else
+ ESTACK_PUSH(s, *ctx.ptr);
}
ctx.i--; ctx.ptr++;
TRAP_LOCATION(map_subtag);
@@ -2047,15 +2051,43 @@ trapping_make_hash2(Eterm term, Eterm* state_mref_write_back, Process* p)
return make_hash2_helper(term, 1, state_mref_write_back, p);
}
+#ifdef DBG_HASHMAP_COLLISION_BONANZA
+Uint32 erts_dbg_hashmap_collision_bonanza(Uint32 hash, Eterm key)
+{
+/*{
+ static Uint32 hashvec[7] = {
+ 0x02345678,
+ 0x12345678,
+ 0xe2345678,
+ 0xf2345678,
+ 0x12abcdef,
+ 0x13abcdef,
+ 0xcafebabe
+ };
+ hash = hashvec[hash % (sizeof(hashvec) / sizeof(hashvec[0]))];
+ }*/
+ const Uint32 bad_hash = (hash & 0x12482481) * 1442968193;
+ const Uint32 bad_bits = hash % 67;
+ if (bad_bits < 32) {
+ /* Mix in a number of high good bits to get "randomly" close
+ to the collision nodes */
+ const Uint32 bad_mask = (1 << bad_bits) - 1;
+ return (hash & ~bad_mask) | (bad_hash & bad_mask);
+ }
+ return bad_hash;
+}
+#endif
+
/* Term hash function for maps, with a separate depth parameter */
-Uint32 make_map_hash(Eterm key, int depth) {
+Uint32 make_map_hash(Eterm key) {
Uint32 hash = 0;
- if (depth > 0) {
- UINT32_HASH_2(depth, 1, HCONST_22);
- }
+ hash = make_internal_hash(key, hash);
- return make_internal_hash(key, hash);
+#ifdef DBG_HASHMAP_COLLISION_BONANZA
+ hash = erts_dbg_hashmap_collision_bonanza(hash, key);
+#endif
+ return hash;
}
/* Term hash function for internal use.
@@ -2068,12 +2100,10 @@ Uint32 make_map_hash(Eterm key, int depth) {
* with a new ErlNode struct, externals from that node will hash different than
* before.
*
- * One IMPORTANT property must hold (for hamt).
- * EVERY BIT of the term that is significant for equality (see EQ)
- * MUST BE USED AS INPUT FOR THE HASH. Two different terms must always have a
- * chance of hashing different when salted.
- *
- * This is why we cannot use cached hash values for atoms for example.
+ * The property "EVERY BIT of the term that is significant for equality
+ * MUST BE USED AS INPUT FOR THE HASH" is nice but no longer crucial for the
+ * hashmap implementation that now uses collision nodes at the bottom of
+ * the HAMT when all hash bits are exhausted.
*
*/
@@ -2219,6 +2249,8 @@ make_internal_hash(Eterm term, Uint32 salt)
}
else {
ASSERT(is_boxed(*ptr));
+ /* no special treatment of collision nodes needed,
+ hash them as the tuples they are */
ESTACK_PUSH(s, *ptr);
}
i--; ptr++;
@@ -3084,8 +3116,8 @@ tailrecur_ne:
sz = hashmap_bitcount(MAP_HEADER_VAL(hdr));
ASSERT(sz > 0 && sz < 17);
break;
- default:
- erts_exit(ERTS_ERROR_EXIT, "Unknown hashmap subsubtag\n");
+ default:
+ erts_exit(ERTS_ERROR_EXIT, "bad header");
}
goto term_array;
}
@@ -3448,6 +3480,11 @@ tailrecur_ne:
A minimal key can only be candidate as tie-breaker if we
have passed that hash value in the other tree (which means
the key did not exist in the other tree).
+
+ Collision node amendment:
+ The leafs in collision nodes are sorted in map-key order.
+ If keys are different but hashes are equal we advance the
+ one lagging behind key-wise.
*/
sp = PSTACK_PUSH(hmap_stack);
@@ -3846,12 +3883,19 @@ pop_next:
sp = PSTACK_TOP(hmap_stack);
if (j) {
/* Key diff found, enter phase 2 */
- if (hashmap_key_hash_cmp(sp->ap, sp->bp) < 0) {
+ int hash_cmp = hashmap_key_hash_cmp(sp->ap, sp->bp);
+ if (hash_cmp == 0) {
+ /* Hash collision. Collision nodes are sorted by map key
+ * order, so we advance the one with the lesser key */
+ hash_cmp = j;
+ }
+ if (hash_cmp < 0) {
sp->min_key = CAR(sp->ap);
sp->cmp_res = -1;
sp->ap = hashmap_iterator_next(&stack);
}
else {
+ ASSERT(hash_cmp > 0);
sp->min_key = CAR(sp->bp);
sp->cmp_res = 1;
sp->bp = hashmap_iterator_next(&b_stack);
@@ -3901,7 +3945,7 @@ pop_next:
sp->bp = hashmap_iterator_next(&b_stack);
if (!sp->ap) {
/* end of maps with identical keys */
- ASSERT(!sp->bp);
+ ASSERT(!sp->bp); /* as we assume indentical map sizes */
j = sp->cmp_res;
exact = sp->was_exact;
(void) PSTACK_POP(hmap_stack);
@@ -3936,14 +3980,21 @@ pop_next:
/* fall through */
case_HASHMAP_PHASE2_NEXT_STEP:
if (sp->ap || sp->bp) {
- if (hashmap_key_hash_cmp(sp->ap, sp->bp) < 0) {
+ int hash_cmp = hashmap_key_hash_cmp(sp->ap, sp->bp);
+ if (hash_cmp == 0) {
+ /* Hash collision. Collision nodes are sorted by map key
+ * order, so we advance the one with the lesser key */
+ hash_cmp = j;
+ }
+ if (hash_cmp < 0) {
ASSERT(sp->ap);
a = CAR(sp->ap);
b = sp->min_key;
ASSERT(exact);
WSTACK_PUSH(stack, OP_WORD(HASHMAP_PHASE2_IS_MIN_KEY_A));
}
- else { /* hash_cmp > 0 */
+ else {
+ ASSERT(hash_cmp > 0);
ASSERT(sp->bp);
a = CAR(sp->bp);
b = sp->min_key;
diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl
index 5707a8e56c..9399303f54 100644
--- a/erts/emulator/test/map_SUITE.erl
+++ b/erts/emulator/test/map_SUITE.erl
@@ -256,6 +256,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) ->
60 = map_size(M0),
60 = maps:size(M0),
+ 60 = apply(erlang, id(map_size), [M0]),
+ 60 = apply(maps, id(size), [M0]),
% with repeating
M1 = id(#{ 10=>a0,20=>b0,30=>"c0","40"=>"d0",<<"50">>=>"e0",{["00"]}=>"10",
@@ -292,6 +294,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) ->
60 = map_size(M1),
60 = maps:size(M1),
+ 60 = apply(erlang, id(map_size), [M1]),
+ 60 = apply(maps, id(size), [M1]),
% with floats
@@ -345,6 +349,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) ->
90 = map_size(M2),
90 = maps:size(M2),
+ 90 = apply(erlang, id(map_size), [M2]),
+ 90 = apply(maps, id(size), [M2]),
% with bignums
M3 = id(#{ 10=>a0,20=>b0,30=>"c0","40"=>"d0",<<"50">>=>"e0",{["00"]}=>"10",
@@ -408,6 +414,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) ->
98 = map_size(M3),
98 = maps:size(M3),
+ 98 = apply(erlang, id(map_size), [M3]),
+ 98 = apply(maps, id(size), [M3]),
%% with maps
@@ -528,6 +536,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) ->
95 = map_size(M4),
95 = maps:size(M4),
+ 95 = apply(erlang, id(map_size), [M4]),
+ 95 = apply(maps, id(size), [M4]),
% call for value
@@ -625,6 +635,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) ->
95 = map_size(M5),
95 = maps:size(M5),
+ 95 = apply(erlang, id(map_size), [M5]),
+ 95 = apply(maps, id(size), [M5]),
%% remember
@@ -2046,6 +2058,10 @@ t_bif_map_merge(Config) when is_list(Config) ->
{'EXIT',{{badmap,T},[{maps,merge,_,_}|_]}} =
(catch maps:merge(T, #{})),
{'EXIT',{{badmap,T},[{maps,merge,_,_}|_]}} =
+ (catch maps:merge(M11, T)),
+ {'EXIT',{{badmap,T},[{maps,merge,_,_}|_]}} =
+ (catch maps:merge(T, M11)),
+ {'EXIT',{{badmap,T},[{maps,merge,_,_}|_]}} =
(catch maps:merge(T, T))
end),
ok.
@@ -2279,6 +2295,16 @@ t_bif_erlang_phash2() ->
70249457 = erlang:phash2(M0), % 118679416
59617982 = erlang:phash2(M1), % 51612236
70249457 = erlang:phash2(M2), % 118679416
+
+ M1000 = maps:from_list([{K,K} || K <- lists:seq(1,1000)]),
+ 66609305 = erlang:phash2(M1000),
+
+ Mnested1 = #{flatmap => M0, M0 => flatmap, hashmap => M1000, M1000 => hashmap},
+ 113689339 = erlang:phash2(Mnested1),
+
+ Mnested2 = maps:merge(Mnested1, M1000),
+ 29167443 = erlang:phash2(Mnested2),
+
ok.
t_bif_erlang_phash() ->
@@ -2299,6 +2325,16 @@ t_bif_erlang_phash() ->
2620391445 = erlang:phash(M0,Sz), % 3590546636
1670235874 = erlang:phash(M1,Sz), % 4066388227
2620391445 = erlang:phash(M2,Sz), % 3590546636
+
+ M1000 = maps:from_list([{K,K} || K <- lists:seq(1,1000)]),
+ 1945662653 = erlang:phash(M1000, Sz),
+
+ Mnested1 = #{flatmap => M0, M0 => flatmap, hashmap => M1000, M1000 => hashmap},
+ 113694495 = erlang:phash(Mnested1, Sz),
+
+ Mnested2 = maps:merge(Mnested1, M1000),
+ 431825783 = erlang:phash(Mnested2, Sz),
+
ok.
t_map_encode_decode(Config) when is_list(Config) ->
@@ -2774,25 +2810,31 @@ t_maps_without(_Config) ->
%% Verify that the the number of nodes in hashmaps
%% of different types and sizes does not deviate too
%% much from the theoretical model.
+%% For debug with DBG_HASHMAP_COLLISION_BONANZA the test will expect
+%% the hashmaps to NOT be well balanced.
t_hashmap_balance(_Config) ->
+ erts_debug:set_internal_state(available_internal_state, true),
+ ExpectBalance = not erts_debug:get_internal_state(hashmap_collision_bonanza),
+ hashmap_balance(ExpectBalance),
+ erts_debug:set_internal_state(available_internal_state, false),
+ ok.
+
+hashmap_balance(EB) ->
io:format("Integer keys\n", []),
- hashmap_balance(fun(I) -> I end),
+ hashmap_balance(fun(I) -> I end, EB),
io:format("Float keys\n", []),
- hashmap_balance(fun(I) -> float(I) end),
+ hashmap_balance(fun(I) -> float(I) end, EB),
io:format("String keys\n", []),
- hashmap_balance(fun(I) -> integer_to_list(I) end),
+ hashmap_balance(fun(I) -> integer_to_list(I) end, EB),
io:format("Binary (big) keys\n", []),
- hashmap_balance(fun(I) -> <<I:16/big>> end),
+ hashmap_balance(fun(I) -> <<I:16/big>> end, EB),
io:format("Binary (little) keys\n", []),
- hashmap_balance(fun(I) -> <<I:16/little>> end),
+ hashmap_balance(fun(I) -> <<I:16/little>> end, EB),
io:format("Atom keys\n", []),
- erts_debug:set_internal_state(available_internal_state, true),
- hashmap_balance(fun(I) -> erts_debug:get_internal_state({atom,I}) end),
- erts_debug:set_internal_state(available_internal_state, false),
-
+ hashmap_balance(fun(I) -> erts_debug:get_internal_state({atom,I}) end, EB),
ok.
-hashmap_balance(KeyFun) ->
+hashmap_balance(KeyFun, ExpectBalance) ->
%% For uniformly distributed hash values, the average number of nodes N
%% in a hashmap varies between 0.3*K and 0.4*K where K is number of keys.
%% The standard deviation of N is about sqrt(K)/3.
@@ -2836,9 +2878,10 @@ hashmap_balance(KeyFun) ->
erts_debug:flat_size(MaxMap)])
end,
- true = (MaxDiff < 6), % The probability of this line failing is about 0.000000001
- % for a uniform hash. I've set the probability this "high" for now
- % to detect flaws in our make_internal_hash.
+ %% The probability of this line failing is about 0.000000001
+ %% for a uniform hash. I've set the probability this "high" for now
+ %% to detect flaws in our make_internal_hash.
+ ExpectBalance = (MaxDiff < 6),
ok.
hashmap_nodes(M) ->
@@ -2847,6 +2890,7 @@ hashmap_nodes(M) ->
case element(1,Tpl) of
bitmaps -> Acc + element(2,Tpl);
arrays -> Acc + element(2,Tpl);
+ collisions -> Acc + element(2,Tpl);
_ -> Acc
end
end,