Optimize the copy that occurs in ets:lookup_element

Michal Muskala pointed to me that sometimes ets:lookup_element can be slower than ets:lookup, even when the value is found. For example: ``` erlperf "ets:lookup(ac_tab, {env, kernel, logger})." "ets:lookup_element(ac_tab, {env, kernel, logger}, 2)." Code || QPS Time Rel ets:lookup(ac_tab, {env, kernel, logger}). 1 2859 Ki 349 ns 100% ets:lookup_element(ac_tab, {env, kernel, logger}, 2). 1 2164 Ki 462 ns 75% ``` The reason for this is that lookup can find the size of the allocation needed directly in the DbTerm, then copy the whole thing efficiently with copy_shallow_x, whereas lookup_element must first walk all over the value to be copied in size_object_x, and then walk it again in copy_struct_x. This patch fixes this in three steps: - Make db_store_term (used by ets:insert among others) use a specialized version of copy_struct that makes the fields be laid in order (instead of being chaotically interspersed as usual) - Finding the size of the needed allocation can now be done efficiently (see db_field_size) - And we can use copy_shallow_x for the copy like lookup (just with a small change so that it does not assume that it is copying a tuple). Result: ``` PATH=~/otp/bin:$PATH erlperf "ets:lookup(ac_tab, {env, kernel, logger})." "ets:lookup_element(ac_tab, {env, kernel, logger}, 2)." Code || QPS Time Rel ets:lookup_element(ac_tab, {env, kernel, logger}, 2). 1 2945 Ki 339 ns 100% ets:lookup(ac_tab, {env, kernel, logger}). 1 2860 Ki 349 ns 97% ``` I've tried measuring the potential impact on ets:insert performance by looking at the 2 throughput benchmarks in ets_SUITE. More specifically, I looked at the 50% insert/50% delete mix (since it maximizes the impact of insert) and at the 1 process case (since my change should not affect concurrency in any way). Here are the results for the long benchmark, with 3 runs in each configuration, listing all three kinds of table tested: [set, {write_concurrency, auto}, {read_concurrency, true}]: Baseline: 3.55 / 3.31 / 2.85 This patch: 3.20 / 3.08 / 3.44 [set, {write_concurrency, true}, {read_concurrency, true}]: Baseline: 3.31 / 3.11 / 2.78 This patch: 3.13 / 3.08 / 3.42 [ordered_set, {write_concurrency, true}, {read_concurrency, true}]: Baseline: 1.51 / 1.56 / 1.17 This patch: 1.50 / 1.37 / 1.59 (all running on my M1 MBP) There is no obvious performance regression, but the noise is sufficiently high that I may have missed some. Co-authored-by: Sverker Eriksson <sverker@erlang.org>
author: Robin Morisset <rmorisset@fb.com> 2022-09-01 03:13:18 -0700
committer: Sverker Eriksson <sverker@erlang.org> 2023-02-27 19:37:10 +0100
commit: 04625c7082ae25d81cff663c6b16b8e92906e319 (patch)
tree: 741a00d9fe1dbdd2ba21e0ef120393f1f4b83d61 /lib/stdlib/test
parent: 269ca9e6de10597ed7ec5ae335f14f190b6d0315 (diff)
download: erlang-04625c7082ae25d81cff663c6b16b8e92906e319.tar.gz
1 files changed, 16 insertions, 5 deletions
diff --git a/lib/stdlib/test/ets_SUITE.erl b/lib/stdlib/test/ets_SUITE.erl
index 64009a5273..605f301d05 100644
--- a/lib/stdlib/test/ets_SUITE.erl
+++ b/lib/stdlib/test/ets_SUITE.erl
@@ -2301,13 +2301,19 @@ update_element_do(Tab,Tuple,Key,UpdPos) ->
 
     Big32 = 16#12345678,
     Big64 = 16#123456789abcdef0,
-    Values = { 623, -27, 0, Big32, -Big32, Big64, -Big64, Big32*Big32,
+    RefcBin = list_to_binary(lists:seq(1,100)),
+    BigMap1 = maps:from_list([{N,N} || N <- lists:seq(1,33)]),
+    BigMap2 = BigMap1#{key => RefcBin, RefcBin => value},
+    Values = { 623, -27, Big32, -Big32, Big64, -Big64, Big32*Big32,
 	       -Big32*Big32, Big32*Big64, -Big32*Big64, Big64*Big64, -Big64*Big64,
 	       "A", "Sverker", [], {12,-132}, {},
-	       <<45,232,0,12,133>>, <<234,12,23>>, list_to_binary(lists:seq(1,100)),
+	       <<45,232,0,12,133>>, <<234,12,23>>, RefcBin,
 	       (fun(X) -> X*Big32 end),
-	       make_ref(), make_ref(), self(), ok, update_element, 28, 29 },
-    Length = size(Values),
+	       make_ref(), make_ref(), self(), ok, update_element,
+               #{a => value, "hello" => "world", 1.0 => RefcBin },
+               BigMap1, BigMap2},
+    Length = tuple_size(Values),
+    29 = Length,
 
     PosValArgF = fun(ToIx, ResList, [Pos | PosTail], Rand, MeF) ->
 			 NextIx = (ToIx+Rand) rem Length,
@@ -2327,7 +2333,12 @@ update_element_do(Tab,Tuple,Key,UpdPos) ->
                       true = ets:update_element(Tab, Key, PosValArg),
                       ArgHash = erlang:phash2({Tab,Key,PosValArg}),
                       NewTuple = update_tuple(PosValArg,Tuple),
-                      [NewTuple] = ets:lookup(Tab,Key)
+                      [NewTuple] = ets:lookup(Tab,Key),
+                      [begin
+                           Elem = element(I, NewTuple),
+                           Elem = ets:lookup_element(Tab, Key, I)
+                       end
+                       || I <- lists:seq(1, tuple_size(NewTuple))]
 	      end,
 
     LoopF = fun(_FromIx, Incr, _Times, Checksum, _MeF) when Incr >= Length ->
author	Robin Morisset <rmorisset@fb.com>	2022-09-01 03:13:18 -0700
committer	Sverker Eriksson <sverker@erlang.org>	2023-02-27 19:37:10 +0100
commit	04625c7082ae25d81cff663c6b16b8e92906e319 (patch)
tree	741a00d9fe1dbdd2ba21e0ef120393f1f4b83d61 /lib/stdlib/test
parent	269ca9e6de10597ed7ec5ae335f14f190b6d0315 (diff)
download	erlang-04625c7082ae25d81cff663c6b16b8e92906e319.tar.gz