MDEV-20646: 10.3.18 is slower than 10.3.17

Fix incorrect change introduced in the fix for MDEV-20109. The patch tried to compute a more precise estimate for the record_count value in SJ-Materialization-Scan strategy (in Sj_materialization_picker::check_qep). However the new formula is worse as it produces extremely optimistic results in common cases where SJ-Materialization-Scan should be used) The old formula produces pessimistic results in cases when Sj-Materialization- Scan is unlikely to be a good choice anyway. So, the old behavior is better.
author: Sergei Petrunia <psergey@askmonty.org> 2019-11-13 18:53:59 +0300
committer: Sergei Petrunia <psergey@askmonty.org> 2019-11-13 18:53:59 +0300
commit: d4edb0510ec1189f65850bb47977e94ed98b1f71 (patch)
tree: c3fdffe3d21f99dda7952c63386dc62a81aa3dd4
parent: 5098d708a07f90484c9e13fe3ab58113a8a10191 (diff)
download: mariadb-git-d4edb0510ec1189f65850bb47977e94ed98b1f71.tar.gz
3 files changed, 27 insertions, 12 deletions
diff --git a/mysql-test/main/subselect_sj2_mat.result b/mysql-test/main/subselect_sj2_mat.result
index dd9f560aeed..ab3e4652deb 100644
--- a/mysql-test/main/subselect_sj2_mat.result
+++ b/mysql-test/main/subselect_sj2_mat.result
@@ -1855,18 +1855,18 @@ AND t3.id_product IN (SELECT id_product FROM t2 t2_3 WHERE t2_3.id_t2 = 18 OR t2
 AND t3.id_product IN (SELECT id_product FROM t2 t2_4 WHERE t2_4.id_t2 = 34 OR t2_4.id_t2 = 23) 
 AND t3.id_product IN (SELECT id_product FROM t2 t2_5 WHERE t2_5.id_t2 = 29 OR t2_5.id_t2 = 28 OR t2_5.id_t2 = 26);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	<subquery3>	ALL	distinct_key	NULL	NULL	NULL	12	
-1	PRIMARY	t3	eq_ref	PRIMARY	PRIMARY	4	test.t2_2.id_product	1	Using where; Using index
+1	PRIMARY	t1	index	NULL	PRIMARY	8	NULL	73	Using index
+1	PRIMARY	t3	eq_ref	PRIMARY	PRIMARY	4	test.t1.id_product	1	Using index
 1	PRIMARY	<subquery5>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
 1	PRIMARY	<subquery4>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
-1	PRIMARY	<subquery6>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
+1	PRIMARY	<subquery3>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
+1	PRIMARY	t4	eq_ref	PRIMARY	PRIMARY	8	test.t1.id_product,const	1	Using where; Using index
+1	PRIMARY	<subquery6>	eq_ref	distinct_key	distinct_key	4	func	1	Using where
 1	PRIMARY	t5	ALL	NULL	NULL	NULL	NULL	18	Using where; Using join buffer (flat, BNL join)
-1	PRIMARY	t4	eq_ref	PRIMARY	PRIMARY	8	test.t3.id_product,const	1	Using where; Using index
-1	PRIMARY	t1	index	NULL	PRIMARY	8	NULL	73	Using where; Using index; Using join buffer (flat, BNL join)
-3	MATERIALIZED	t2_2	ref	id_t2,id_product	id_t2	5	const	12	Using where
 5	MATERIALIZED	t2_4	range	id_t2,id_product	id_t2	5	NULL	18	Using index condition; Using where
 4	MATERIALIZED	t2_3	range	id_t2,id_product	id_t2	5	NULL	32	Using index condition; Using where
-6	MATERIALIZED	t2_5	range	id_t2,id_product	id_t2	5	NULL	30	Using index condition; Using where
+3	MATERIALIZED	t2_2	ref	id_t2,id_product	id_t2	5	const	12	
 2	MATERIALIZED	t2_1	ref	id_t2,id_product	id_t2	5	const	50	
+6	MATERIALIZED	t2_5	range	id_t2,id_product	id_t2	5	NULL	30	Using index condition; Using where
 drop table t1,t2,t3,t4,t5;
diff --git a/mysql-test/main/subselect_sj_jcl6.result b/mysql-test/main/subselect_sj_jcl6.result
index 6278b5a0cf5..e9a19b2a1c3 100644
--- a/mysql-test/main/subselect_sj_jcl6.result
+++ b/mysql-test/main/subselect_sj_jcl6.result
@@ -3527,8 +3527,8 @@ EXPLAIN
 SELECT a FROM t1 t WHERE a IN (SELECT b FROM t1, t2 WHERE b = a)
 GROUP BY a HAVING a != 'z';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	<subquery2>	ALL	distinct_key	NULL	NULL	NULL	2	Using temporary; Using filesort
-1	PRIMARY	t	ref	idx_a	idx_a	4	test.t2.b	2	Using index
+1	PRIMARY	t	index	idx_a	idx_a	4	NULL	3	Using index
+1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	2	Using where
 2	MATERIALIZED	t1	ref	idx_a	idx_a	4	test.t2.b	2	Using index
 SELECT a FROM t1 t WHERE a IN (SELECT b FROM t1, t2 WHERE b = a)
@@ -3541,8 +3541,8 @@ EXPLAIN
 SELECT a FROM t1 t WHERE a IN (SELECT b FROM t1, t2 WHERE b = a)
 GROUP BY a HAVING a != 'z';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	<subquery2>	ALL	distinct_key	NULL	NULL	NULL	2	Using temporary; Using filesort
-1	PRIMARY	t	ref	idx_a	idx_a	4	test.t2.b	2	Using index
+1	PRIMARY	t	index	idx_a	idx_a	4	NULL	3	Using index
+1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	
 2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	2	Using where
 2	MATERIALIZED	t1	ref	idx_a	idx_a	4	test.t2.b	2	Using index
 SELECT a FROM t1 t WHERE a IN (SELECT b FROM t1, t2 WHERE b = a)
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index aeafc13998a..a8afd952a4d 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -3029,7 +3029,22 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
 
     *strategy= SJ_OPT_MATERIALIZE_SCAN;
     *read_time=    prefix_cost;
-    *record_count= prefix_rec_count / mat_info->rows_with_duplicates;
+    /*
+      Note: the next line means we did not remove the subquery's fanout from
+      *record_count. It needs to be removed, as the join prefix is
+
+        ntX  SJM-SCAN(it1 ... itN) | (ot1 ... otN) ...
+
+      here, the SJM-SCAN may have introduced subquery's fanout (duplicate rows,
+      rows that don't have matches in ot1_i). All this fanout is gone after
+      table otN (or earlier) but taking it into account is hard.
+
+      Some consolation here is that SJM-Scan strategy is applicable when the
+      subquery is smaller than tables otX. If the subquery has large cardinality,
+      we can greatly overestimate *record_count here, but it doesn't matter as
+      SJ-Materialization-Lookup is a better strategy anyway.
+    */
+    *record_count= prefix_rec_count;
     *handled_fanout= mat_nest->sj_inner_tables;
     return TRUE;
   }
author	Sergei Petrunia <psergey@askmonty.org>	2019-11-13 18:53:59 +0300
committer	Sergei Petrunia <psergey@askmonty.org>	2019-11-13 18:53:59 +0300
commit	d4edb0510ec1189f65850bb47977e94ed98b1f71 (patch)
tree	c3fdffe3d21f99dda7952c63386dc62a81aa3dd4
parent	5098d708a07f90484c9e13fe3ab58113a8a10191 (diff)
download	mariadb-git-d4edb0510ec1189f65850bb47977e94ed98b1f71.tar.gz