summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSverker Eriksson <sverker@erlang.org>2022-11-29 00:37:02 +0100
committerSverker Eriksson <sverker@erlang.org>2022-11-30 18:28:35 +0100
commit4e7e7721df0e5144e9e4bd12422b6bc615c526a9 (patch)
tree5fd7553672d9be76207fd8c6389b61e637b4e4b5
parent3002f55f409f44122d3a45ac53bfd453e9aa2cb2 (diff)
downloaderlang-4e7e7721df0e5144e9e4bd12422b6bc615c526a9.tar.gz
erts: Fix bug decoding unsorted flatmaps as keys in hashmap
"binary_to_term" of a hashmap (> 32 keys) with unsorted flatmap(s) (<= 32 keys) as key(s) inside the hashmap. The map term would be created but the map keys could not be found with maps:get and friends. Other operations such as map compare and merge could probably also give incorrect results. This was only be a problem if the term was encoded outside the VM by erl_interface, jinterface or otherwise, as the VM itself always encodes flatmaps with sorted keys.
-rw-r--r--erts/emulator/beam/external.c120
-rw-r--r--erts/emulator/test/binary_SUITE.erl32
2 files changed, 93 insertions, 59 deletions
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index 1d1c473ab9..c2ec1730ee 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -1610,8 +1610,7 @@ typedef struct {
ErtsHeapFactory factory;
int remaining_n;
char* remaining_bytes;
- ErtsWStack flat_maps;
- ErtsPStack hamt_array;
+ ErtsPStack map_array;
} B2TDecodeContext;
typedef struct {
@@ -1810,9 +1809,9 @@ static void b2t_destroy_context(B2TContext* context)
case B2TDecodeTuple:
case B2TDecodeString:
case B2TDecodeBinary:
- if (context->u.dc.hamt_array.pstart) {
- erts_free(context->u.dc.hamt_array.alloc_type,
- context->u.dc.hamt_array.pstart);
+ if (context->u.dc.map_array.pstart) {
+ erts_free(context->u.dc.map_array.alloc_type,
+ context->u.dc.map_array.pstart);
}
break;
default:;
@@ -1967,8 +1966,7 @@ static BIF_RETTYPE binary_to_term_int(Process* p, Eterm bin, B2TContext *ctx)
ctx->u.dc.res = (Eterm) (UWord) NULL;
ctx->u.dc.next = &ctx->u.dc.res;
erts_factory_proc_prealloc_init(&ctx->u.dc.factory, p, ctx->heap_size);
- ctx->u.dc.flat_maps.wstart = NULL;
- ctx->u.dc.hamt_array.pstart = NULL;
+ ctx->u.dc.map_array.pstart = NULL;
ctx->state = B2TDecode;
/*fall through*/
case B2TDecode:
@@ -3913,11 +3911,14 @@ is_external_string(Eterm list, Uint* lenp)
}
-struct dec_term_hamt
+struct dec_term_map
{
- Eterm* objp; /* write result here */
- Uint size; /* nr of leafs */
- Eterm* leaf_array;
+ Eterm* objp; /* hashmap: write result here, flatmap: NULL */
+ Uint size; /* hashmap: nr of leafs, flatmap: unused */
+ union {
+ Eterm* leaf_array; /* hashmap */
+ flatmap_t* flatmap;
+ } u;
};
@@ -3932,12 +3933,11 @@ dec_term(ErtsDistExternal *edep,
B2TContext* ctx,
int ets_decode)
{
-#define PSTACK_TYPE struct dec_term_hamt
- PSTACK_DECLARE(hamt_array, 5);
+#define PSTACK_TYPE struct dec_term_map
+ PSTACK_DECLARE(map_array, 10);
int n;
ErtsAtomEncoding char_enc;
register Eterm* hp; /* Please don't take the address of hp */
- DECLARE_WSTACK(flat_maps); /* for preprocessing of small maps */
Eterm* next;
SWord reds;
#ifdef DEBUG
@@ -4023,13 +4023,9 @@ dec_term(ErtsDistExternal *edep,
return NULL;
}
}
- PSTACK_CHANGE_ALLOCATOR(hamt_array, ERTS_ALC_T_SAVED_ESTACK);
- WSTACK_CHANGE_ALLOCATOR(flat_maps, ERTS_ALC_T_SAVED_ESTACK);
- if (ctx->u.dc.hamt_array.pstart) {
- PSTACK_RESTORE(hamt_array, &ctx->u.dc.hamt_array);
- }
- if (ctx->u.dc.flat_maps.wstart) {
- WSTACK_RESTORE(flat_maps, &ctx->u.dc.flat_maps);
+ PSTACK_CHANGE_ALLOCATOR(map_array, ERTS_ALC_T_SAVED_ESTACK);
+ if (ctx->u.dc.map_array.pstart) {
+ PSTACK_RESTORE(map_array, &ctx->u.dc.map_array);
}
}
else {
@@ -4631,6 +4627,7 @@ dec_term_atom_common:
Uint32 size,n;
Eterm *kptr,*vptr;
Eterm keys;
+ struct dec_term_map* map = PSTACK_PUSH(map_array);
size = get_int32(ep); ep += 4;
@@ -4651,7 +4648,9 @@ dec_term_atom_common:
* vptr, last word for values
*/
- WSTACK_PUSH(flat_maps, (UWord)mp);
+ map->objp = NULL;
+ map->u.flatmap = mp;
+
mp->thing_word = MAP_HEADER_FLATMAP;
mp->size = size;
mp->keys = keys;
@@ -4666,11 +4665,10 @@ dec_term_atom_common:
}
}
else { /* Make hamt */
- struct dec_term_hamt* hamt = PSTACK_PUSH(hamt_array);
-
- hamt->objp = objp;
- hamt->size = size;
- hamt->leaf_array = hp;
+ ASSERT(objp != NULL);
+ map->objp = objp;
+ map->size = size;
+ map->u.leaf_array = hp;
for (n = size; n; n--) {
CDR(hp) = (Eterm) next;
@@ -4836,11 +4834,8 @@ dec_term_atom_common:
ctx->u.dc.ep = ep;
ctx->u.dc.next = next;
ctx->u.dc.factory.hp = hp;
- if (!WSTACK_ISEMPTY(flat_maps)) {
- WSTACK_SAVE(flat_maps, &ctx->u.dc.flat_maps);
- }
- if (!PSTACK_IS_EMPTY(hamt_array)) {
- PSTACK_SAVE(hamt_array, &ctx->u.dc.hamt_array);
+ if (!PSTACK_IS_EMPTY(map_array)) {
+ PSTACK_SAVE(map_array, &ctx->u.dc.map_array);
}
ctx->reds = 0;
return NULL;
@@ -4857,34 +4852,43 @@ dec_term_atom_common:
factory->hp = hp;
/*
* From here on factory may produce (more) heap fragments
+ * and we don't use local variable 'hp' anymore.
*/
- if (!PSTACK_IS_EMPTY(hamt_array)) {
- do {
- struct dec_term_hamt* hamt = PSTACK_TOP(hamt_array);
-
- *hamt->objp = erts_hashmap_from_array(factory,
- hamt->leaf_array,
- hamt->size,
- 1);
- if (is_non_value(*hamt->objp))
- goto error_hamt;
-
- (void) PSTACK_POP(hamt_array);
- } while (!PSTACK_IS_EMPTY(hamt_array));
- PSTACK_DESTROY(hamt_array);
- }
-
- /* Iterate through all the (flat)maps and check for validity and sort keys
- * - done here for when we know it is complete.
+ /*
+ * Iterate through all the maps and for
+ * + hashmaps: hash keys and generate all inner hamt nodes
+ * + flatmaps: check for duplicate keys and sort keys if needed
+ *
+ * We do this at the end because the size of the preallocated heap is only
+ * guaranteed to include everything except hamt nodes. If unlucky with hash
+ * collisions the factory may need to create new heap fragments.
+ *
+ * As maps may include each other as keys, it's important the iteration
+ * below is done bottom-up. Sub maps are completed before potential
+ * container maps.
*/
+ if (!PSTACK_IS_EMPTY(map_array)) {
+ do {
+ struct dec_term_map* map = PSTACK_TOP(map_array);
+
+ if (map->objp) {
+ *map->objp = erts_hashmap_from_array(factory,
+ map->u.leaf_array,
+ map->size,
+ 1);
+ if (is_non_value(*map->objp))
+ goto error_map_fixup;
+ }
+ else {
+ if (!erts_validate_and_sort_flatmap(map->u.flatmap))
+ goto error_map_fixup;
+ }
- while(!WSTACK_ISEMPTY(flat_maps)) {
- next = (Eterm *)WSTACK_POP(flat_maps);
- if (!erts_validate_and_sort_flatmap((flatmap_t*)next))
- goto error;
+ (void) PSTACK_POP(map_array);
+ } while (!PSTACK_IS_EMPTY(map_array));
+ PSTACK_DESTROY(map_array);
}
- WSTACK_DESTROY(flat_maps);
ASSERT((Eterm*)*dbg_resultp != NULL);
@@ -4908,15 +4912,13 @@ error:
}
else ASSERT(!factory->hp || factory->hp == hp);
-error_hamt:
+error_map_fixup:
erts_factory_undo(factory);
- PSTACK_DESTROY(hamt_array);
+ PSTACK_DESTROY(map_array);
if (ctx) {
ctx->state = B2TDecodeFail;
ctx->reds = reds;
}
- WSTACK_DESTROY(flat_maps);
-
return NULL;
}
diff --git a/erts/emulator/test/binary_SUITE.erl b/erts/emulator/test/binary_SUITE.erl
index 49d2dfb983..2569219f53 100644
--- a/erts/emulator/test/binary_SUITE.erl
+++ b/erts/emulator/test/binary_SUITE.erl
@@ -62,6 +62,7 @@
t_hash/1,
sub_bin_copy/1,
bad_size/1,
+ unsorted_map_in_map/1,
bad_term_to_binary/1,
bad_binary_to_term_2/1,safe_binary_to_term2/1,
bad_binary_to_term/1, bad_terms/1, more_bad_terms/1,
@@ -95,6 +96,7 @@ all() ->
bad_binary_to_term, bad_terms, t_hash, bad_size,
sub_bin_copy, bad_term_to_binary, t2b_system_limit,
term_to_iovec, more_bad_terms,
+ unsorted_map_in_map,
otp_5484, otp_5933,
ordering, unaligned_order, gc_test,
bit_sized_binary_sizes, otp_6817, otp_8117, deep,
@@ -936,6 +938,36 @@ bad_bin_to_term(BadBin) ->
bad_bin_to_term(BadBin,Opts) ->
{'EXIT',{badarg,_}} = (catch binary_to_term_stress(BadBin,Opts)).
+-define(MAP_EXT, 116).
+-define(SMALL_INTEGER_EXT, 97).
+-define(NIL, 106).
+-define(MAP_SMALL_MAP_LIMIT, 32).
+
+%% OTP-18343: Decode unsorted flatmap as key in hashmap
+unsorted_map_in_map(Config) when is_list(Config) ->
+ K1 = 1,
+ K2 = 2,
+ true = K1 < K2,
+ FMap = #{K1 => [], K2 => []},
+ FMapBin = <<?MAP_EXT, 2:32,
+ %% unsorted list of key/value pairs
+ ?SMALL_INTEGER_EXT, K2, ?NIL,
+ ?SMALL_INTEGER_EXT, K1, ?NIL>>,
+ FMap = binary_to_term(<<131, FMapBin/binary>>),
+
+ HKeys = lists:seq(1, ?MAP_SMALL_MAP_LIMIT+1),
+ HMap0 = maps:from_list([{K,[]} || K <- HKeys]),
+ HMap0Bin = term_to_binary(HMap0),
+
+ %% Replace last key/value pair with FMap => []
+ Prologue = binary:part(HMap0Bin, 0, byte_size(HMap0Bin)-3),
+ HMap1Bin = <<Prologue/binary, FMapBin/binary, ?NIL>>,
+ HMap1 = binary_to_term(HMap1Bin),
+
+ %% Moment of truth, can we lookup key FMap
+ [] = maps:get(FMap, HMap1),
+ ok.
+
%% Test safety options for binary_to_term/2
safe_binary_to_term2(Config) when is_list(Config) ->
bad_bin_to_term(<<131,100,0,14,"undefined_atom">>, [safe]),