diff options
author | Robin Morisset <rmorisset@fb.com> | 2022-09-01 03:13:18 -0700 |
---|---|---|
committer | Sverker Eriksson <sverker@erlang.org> | 2023-02-27 19:37:10 +0100 |
commit | 04625c7082ae25d81cff663c6b16b8e92906e319 (patch) | |
tree | 741a00d9fe1dbdd2ba21e0ef120393f1f4b83d61 /lib/stdlib/test | |
parent | 269ca9e6de10597ed7ec5ae335f14f190b6d0315 (diff) | |
download | erlang-04625c7082ae25d81cff663c6b16b8e92906e319.tar.gz |
Optimize the copy that occurs in ets:lookup_element
Michal Muskala pointed to me that sometimes ets:lookup_element can be
slower than ets:lookup, even when the value is found. For example:
```
erlperf "ets:lookup(ac_tab, {env, kernel, logger})." "ets:lookup_element(ac_tab, {env, kernel, logger}, 2)."
Code || QPS Time Rel
ets:lookup(ac_tab, {env, kernel, logger}). 1 2859 Ki 349 ns 100%
ets:lookup_element(ac_tab, {env, kernel, logger}, 2). 1 2164 Ki 462 ns 75%
```
The reason for this is that lookup can find the size of the allocation
needed directly in the DbTerm, then copy the whole thing efficiently
with copy_shallow_x, whereas lookup_element must first walk all over
the value to be copied in size_object_x, and then walk it again in
copy_struct_x.
This patch fixes this in three steps:
- Make db_store_term (used by ets:insert among others) use a
specialized version of copy_struct that makes the fields be laid in
order (instead of being chaotically interspersed as usual)
- Finding the size of the needed allocation can now be done
efficiently (see db_field_size)
- And we can use copy_shallow_x for the copy like lookup (just with a
small change so that it does not assume that it is copying a tuple).
Result:
```
PATH=~/otp/bin:$PATH erlperf "ets:lookup(ac_tab, {env, kernel, logger})." "ets:lookup_element(ac_tab, {env, kernel, logger}, 2)."
Code || QPS Time Rel
ets:lookup_element(ac_tab, {env, kernel, logger}, 2). 1 2945 Ki 339 ns 100%
ets:lookup(ac_tab, {env, kernel, logger}). 1 2860 Ki 349 ns 97%
```
I've tried measuring the potential impact on ets:insert performance
by looking at the 2 throughput benchmarks in ets_SUITE.
More specifically, I looked at the 50% insert/50% delete mix
(since it maximizes the impact of insert) and at the 1 process case
(since my change should not affect concurrency in any way).
Here are the results for the long benchmark, with 3 runs in each
configuration, listing all three kinds of table tested:
[set, {write_concurrency, auto}, {read_concurrency, true}]:
Baseline: 3.55 / 3.31 / 2.85
This patch: 3.20 / 3.08 / 3.44
[set, {write_concurrency, true}, {read_concurrency, true}]:
Baseline: 3.31 / 3.11 / 2.78
This patch: 3.13 / 3.08 / 3.42
[ordered_set, {write_concurrency, true}, {read_concurrency, true}]:
Baseline: 1.51 / 1.56 / 1.17
This patch: 1.50 / 1.37 / 1.59
(all running on my M1 MBP)
There is no obvious performance regression, but the noise is
sufficiently high that I may have missed some.
Co-authored-by: Sverker Eriksson <sverker@erlang.org>
Diffstat (limited to 'lib/stdlib/test')
-rw-r--r-- | lib/stdlib/test/ets_SUITE.erl | 21 |
1 files changed, 16 insertions, 5 deletions
diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl index 64009a5273..605f301d05 100644 --- a/lib/stdlib/test/ets_SUITE.erl +++ b/lib/stdlib/test/ets_SUITE.erl @@ -2301,13 +2301,19 @@ update_element_do(Tab,Tuple,Key,UpdPos) -> Big32 = 16#12345678, Big64 = 16#123456789abcdef0, - Values = { 623, -27, 0, Big32, -Big32, Big64, -Big64, Big32*Big32, + RefcBin = list_to_binary(lists:seq(1,100)), + BigMap1 = maps:from_list([{N,N} || N <- lists:seq(1,33)]), + BigMap2 = BigMap1#{key => RefcBin, RefcBin => value}, + Values = { 623, -27, Big32, -Big32, Big64, -Big64, Big32*Big32, -Big32*Big32, Big32*Big64, -Big32*Big64, Big64*Big64, -Big64*Big64, "A", "Sverker", [], {12,-132}, {}, - <<45,232,0,12,133>>, <<234,12,23>>, list_to_binary(lists:seq(1,100)), + <<45,232,0,12,133>>, <<234,12,23>>, RefcBin, (fun(X) -> X*Big32 end), - make_ref(), make_ref(), self(), ok, update_element, 28, 29 }, - Length = size(Values), + make_ref(), make_ref(), self(), ok, update_element, + #{a => value, "hello" => "world", 1.0 => RefcBin }, + BigMap1, BigMap2}, + Length = tuple_size(Values), + 29 = Length, PosValArgF = fun(ToIx, ResList, [Pos | PosTail], Rand, MeF) -> NextIx = (ToIx+Rand) rem Length, @@ -2327,7 +2333,12 @@ update_element_do(Tab,Tuple,Key,UpdPos) -> true = ets:update_element(Tab, Key, PosValArg), ArgHash = erlang:phash2({Tab,Key,PosValArg}), NewTuple = update_tuple(PosValArg,Tuple), - [NewTuple] = ets:lookup(Tab,Key) + [NewTuple] = ets:lookup(Tab,Key), + [begin + Elem = element(I, NewTuple), + Elem = ets:lookup_element(Tab, Key, I) + end + || I <- lists:seq(1, tuple_size(NewTuple))] end, LoopF = fun(_FromIx, Incr, _Times, Checksum, _MeF) when Incr >= Length -> |