summaryrefslogtreecommitdiff
path: root/lib/stdlib/test
diff options
context:
space:
mode:
authorRobin Morisset <rmorisset@fb.com>2022-09-01 03:13:18 -0700
committerSverker Eriksson <sverker@erlang.org>2023-02-27 19:37:10 +0100
commit04625c7082ae25d81cff663c6b16b8e92906e319 (patch)
tree741a00d9fe1dbdd2ba21e0ef120393f1f4b83d61 /lib/stdlib/test
parent269ca9e6de10597ed7ec5ae335f14f190b6d0315 (diff)
downloaderlang-04625c7082ae25d81cff663c6b16b8e92906e319.tar.gz
Optimize the copy that occurs in ets:lookup_element
Michal Muskala pointed to me that sometimes ets:lookup_element can be slower than ets:lookup, even when the value is found. For example: ``` erlperf "ets:lookup(ac_tab, {env, kernel, logger})." "ets:lookup_element(ac_tab, {env, kernel, logger}, 2)." Code || QPS Time Rel ets:lookup(ac_tab, {env, kernel, logger}). 1 2859 Ki 349 ns 100% ets:lookup_element(ac_tab, {env, kernel, logger}, 2). 1 2164 Ki 462 ns 75% ``` The reason for this is that lookup can find the size of the allocation needed directly in the DbTerm, then copy the whole thing efficiently with copy_shallow_x, whereas lookup_element must first walk all over the value to be copied in size_object_x, and then walk it again in copy_struct_x. This patch fixes this in three steps: - Make db_store_term (used by ets:insert among others) use a specialized version of copy_struct that makes the fields be laid in order (instead of being chaotically interspersed as usual) - Finding the size of the needed allocation can now be done efficiently (see db_field_size) - And we can use copy_shallow_x for the copy like lookup (just with a small change so that it does not assume that it is copying a tuple). Result: ``` PATH=~/otp/bin:$PATH erlperf "ets:lookup(ac_tab, {env, kernel, logger})." "ets:lookup_element(ac_tab, {env, kernel, logger}, 2)." Code || QPS Time Rel ets:lookup_element(ac_tab, {env, kernel, logger}, 2). 1 2945 Ki 339 ns 100% ets:lookup(ac_tab, {env, kernel, logger}). 1 2860 Ki 349 ns 97% ``` I've tried measuring the potential impact on ets:insert performance by looking at the 2 throughput benchmarks in ets_SUITE. More specifically, I looked at the 50% insert/50% delete mix (since it maximizes the impact of insert) and at the 1 process case (since my change should not affect concurrency in any way). Here are the results for the long benchmark, with 3 runs in each configuration, listing all three kinds of table tested: [set, {write_concurrency, auto}, {read_concurrency, true}]: Baseline: 3.55 / 3.31 / 2.85 This patch: 3.20 / 3.08 / 3.44 [set, {write_concurrency, true}, {read_concurrency, true}]: Baseline: 3.31 / 3.11 / 2.78 This patch: 3.13 / 3.08 / 3.42 [ordered_set, {write_concurrency, true}, {read_concurrency, true}]: Baseline: 1.51 / 1.56 / 1.17 This patch: 1.50 / 1.37 / 1.59 (all running on my M1 MBP) There is no obvious performance regression, but the noise is sufficiently high that I may have missed some. Co-authored-by: Sverker Eriksson <sverker@erlang.org>
Diffstat (limited to 'lib/stdlib/test')
-rw-r--r--lib/stdlib/test/ets_SUITE.erl21
1 files changed, 16 insertions, 5 deletions
diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl
index 64009a5273..605f301d05 100644
--- a/lib/stdlib/test/ets_SUITE.erl
+++ b/lib/stdlib/test/ets_SUITE.erl
@@ -2301,13 +2301,19 @@ update_element_do(Tab,Tuple,Key,UpdPos) ->
Big32 = 16#12345678,
Big64 = 16#123456789abcdef0,
- Values = { 623, -27, 0, Big32, -Big32, Big64, -Big64, Big32*Big32,
+ RefcBin = list_to_binary(lists:seq(1,100)),
+ BigMap1 = maps:from_list([{N,N} || N <- lists:seq(1,33)]),
+ BigMap2 = BigMap1#{key => RefcBin, RefcBin => value},
+ Values = { 623, -27, Big32, -Big32, Big64, -Big64, Big32*Big32,
-Big32*Big32, Big32*Big64, -Big32*Big64, Big64*Big64, -Big64*Big64,
"A", "Sverker", [], {12,-132}, {},
- <<45,232,0,12,133>>, <<234,12,23>>, list_to_binary(lists:seq(1,100)),
+ <<45,232,0,12,133>>, <<234,12,23>>, RefcBin,
(fun(X) -> X*Big32 end),
- make_ref(), make_ref(), self(), ok, update_element, 28, 29 },
- Length = size(Values),
+ make_ref(), make_ref(), self(), ok, update_element,
+ #{a => value, "hello" => "world", 1.0 => RefcBin },
+ BigMap1, BigMap2},
+ Length = tuple_size(Values),
+ 29 = Length,
PosValArgF = fun(ToIx, ResList, [Pos | PosTail], Rand, MeF) ->
NextIx = (ToIx+Rand) rem Length,
@@ -2327,7 +2333,12 @@ update_element_do(Tab,Tuple,Key,UpdPos) ->
true = ets:update_element(Tab, Key, PosValArg),
ArgHash = erlang:phash2({Tab,Key,PosValArg}),
NewTuple = update_tuple(PosValArg,Tuple),
- [NewTuple] = ets:lookup(Tab,Key)
+ [NewTuple] = ets:lookup(Tab,Key),
+ [begin
+ Elem = element(I, NewTuple),
+ Elem = ets:lookup_element(Tab, Key, I)
+ end
+ || I <- lists:seq(1, tuple_size(NewTuple))]
end,
LoopF = fun(_FromIx, Incr, _Times, Checksum, _MeF) when Incr >= Length ->