MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogrambb-10.6-mdev31067-variant3

Variant #3. When Histogram::point_selectivity() sees that the point value of interest falls into one bucket, it tries to guess whether the bucket has many different (unpopular) values or a few popular values. (The number of rows is fixed, as it's a Height-balanced histogram). The basis for this guess is the "width" of the value range the bucket covers. Buckets covering wider value ranges are assumed to contain values with proportionally lower frequencies. This is just a [brave] guesswork. For a very narrow bucket, it may produce an estimate that's larger than total #rows in the bucket or even in the whole table. Remove it and replace with another approach: compute the point selectivity by assuming that the number of rows that matches a point column=val condition is the average #rows for non-common value. A value is non-common if it takes less than one whole histogram bucket.
author: Sergei Petrunia <sergey@mariadb.com> 2023-04-27 18:07:55 +0300
committer: Sergei Petrunia <sergey@mariadb.com> 2023-04-27 20:08:16 +0300
commit: 5285832b7810a9bee4f1c36ac0a4caff113ae949 (patch)
tree: 02b4c144ba2792b5f0cc1f0bd8cda081fe8e6785 /mysql-test
parent: 8f87023d3f3fbaad4e33991713db884cbe052fbc (diff)
download: mariadb-git-bb-10.6-mdev31067-variant3.tar.gz
4 files changed, 225 insertions, 14 deletions
diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result
index b5a48d341ae..6159186644b 100644
--- a/mysql-test/main/selectivity.result
+++ b/mysql-test/main/selectivity.result
@@ -834,7 +834,7 @@ flush table t1;
 set optimizer_use_condition_selectivity=4;
 explain extended select * from t1 where a=0;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1025	0.39	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1025	0.79	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 0
 drop table t1;
@@ -1649,7 +1649,7 @@ test.t1	analyze	status	Table is already up to date
 # Check what info the optimizer has about selectivities
 explain extended select * from t1 use index () where a in (17,51,5);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	3.90	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	3.91	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` USE INDEX () where `test`.`t1`.`a` in (17,51,5)
 explain extended select * from t1 use index () where b=2;
@@ -1941,9 +1941,76 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	25.00	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
+DROP TABLE t1;
+# End of 10.2 tests
+#
+# MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
+#
+create table t0(a int);
+insert into t0 select 1 from seq_1_to_78;
+create table t1(a int);
+insert into t1 select 1 from seq_1_to_26;
+create table t10 (a int);
+insert into t10 select 0 from t0, seq_1_to_4;
+insert into t10 select 8693 from t1;
+insert into t10 select 8694 from t1;
+insert into t10 select 8695 from t1;
+insert into t10 select 34783 from t1;
+insert into t10 select 34784 from t1;
+insert into t10 select 34785 from t1;
+insert into t10 select 34785 from t0, seq_1_to_8;
+insert into t10 select 65214 from t1;
+insert into t10 select 65215 from t1;
+insert into t10 select 65216 from t1;
+insert into t10 select 65216 from t0, seq_1_to_52;
+insert into t10 select 65217 from t1;
+insert into t10 select 65218 from t1;
+insert into t10 select 65219 from t1;
+insert into t10 select 65219 from t0;
+insert into t10 select 73913 from t1;
+insert into t10 select 73914 from t1;
+insert into t10 select 73915 from t1;
+insert into t10 select 73915 from t0, seq_1_to_40;
+insert into t10 select 78257 from t1;
+insert into t10 select 78258 from t1;
+insert into t10 select 78259 from t1;
+insert into t10 select 91300 from t1;
+insert into t10 select 91301 from t1;
+insert into t10 select 91302 from t1;
+insert into t10 select 91302 from t0, seq_1_to_6;
+insert into t10 select 91303 from t1;
+insert into t10 select 91304 from t1;
+insert into t10 select 91305 from t1;
+insert into t10 select 91305 from t0, seq_1_to_8;
+insert into t10 select  99998 from t1;
+insert into t10 select  99999 from t1;
+insert into t10 select 100000 from t1;
+set use_stat_tables=preferably;
+analyze table t10 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t10	analyze	status	Engine-independent statistics collected
+test.t10	analyze	status	OK
+flush tables;
+set statement optimizer_trace=1 for
+explain select * from t10  where a in (91303);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t10	ALL	NULL	NULL	NULL	NULL	9984	Using where
+# Must have selectivity_from_histogram <= 1.0:
+select json_detailed(json_extract(trace, '$**.selectivity_for_columns')) 
+from information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
+[
+    [
+        {
+            "column_name": "a",
+            "ranges": 
+            ["91303 <= a <= 91303"],
+            "selectivity_from_histogram": 0.035714283
+        }
+    ]
+]
+drop table t0,t1,t10;
 set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
 set histogram_size=@save_histogram_size;
 set use_stat_tables= @save_use_stat_tables;
-DROP TABLE t1;
-# End of 10.2 tests
 set @@global.histogram_size=@save_histogram_size;
diff --git a/mysql-test/main/selectivity.test b/mysql-test/main/selectivity.test
index 4e4513d09d6..556ffd76a22 100644
--- a/mysql-test/main/selectivity.test
+++ b/mysql-test/main/selectivity.test
@@ -1321,14 +1321,91 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
 FLUSH TABLES;
 
 EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
-set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
-set histogram_size=@save_histogram_size;
-set use_stat_tables= @save_use_stat_tables;
 
 DROP TABLE t1;
 
 --echo # End of 10.2 tests
 
+--echo #
+--echo # MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
+--echo #
+create table t0(a int);  # This holds how many rows we hold in a bucket.
+insert into t0 select 1 from seq_1_to_78;
+
+create table t1(a int);  # one-third of a bucket
+insert into t1 select 1 from seq_1_to_26;
+
+create table t10 (a int);
+insert into t10 select 0 from t0, seq_1_to_4;
+
+insert into t10 select 8693 from t1;
+insert into t10 select 8694 from t1;
+insert into t10 select 8695 from t1;
+
+
+insert into t10 select 34783 from t1;
+insert into t10 select 34784 from t1;
+insert into t10 select 34785 from t1;
+
+
+insert into t10 select 34785 from t0, seq_1_to_8;
+
+insert into t10 select 65214 from t1;
+insert into t10 select 65215 from t1;
+insert into t10 select 65216 from t1;
+
+insert into t10 select 65216 from t0, seq_1_to_52;
+
+insert into t10 select 65217 from t1;
+insert into t10 select 65218 from t1;
+insert into t10 select 65219 from t1;
+
+insert into t10 select 65219 from t0;
+
+
+insert into t10 select 73913 from t1;
+insert into t10 select 73914 from t1;
+insert into t10 select 73915 from t1;
+
+insert into t10 select 73915 from t0, seq_1_to_40;
+
+
+insert into t10 select 78257 from t1;
+insert into t10 select 78258 from t1;
+insert into t10 select 78259 from t1;
+
+insert into t10 select 91300 from t1;
+insert into t10 select 91301 from t1;
+insert into t10 select 91302 from t1;
+
+insert into t10 select 91302 from t0, seq_1_to_6;
+
+insert into t10 select 91303 from t1; # Only 1/3rd of bucket matches the search tuple
+insert into t10 select 91304 from t1;
+insert into t10 select 91305 from t1;
+
+insert into t10 select 91305 from t0, seq_1_to_8;
+
+insert into t10 select  99998 from t1;
+insert into t10 select  99999 from t1;
+insert into t10 select 100000 from t1;
+
+set use_stat_tables=preferably;
+analyze table t10 persistent for all;
+flush tables;
+set statement optimizer_trace=1 for
+explain select * from t10  where a in (91303);
+
+#select * from information_schema.optimizer_trace;
+--echo # Must have selectivity_from_histogram <= 1.0:
+select json_detailed(json_extract(trace, '$**.selectivity_for_columns')) 
+from information_schema.optimizer_trace;
+
+drop table t0,t1,t10;
+
+set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set histogram_size=@save_histogram_size;
+set use_stat_tables= @save_use_stat_tables;
 #
 # Clean up
 #
diff --git a/mysql-test/main/selectivity_innodb.result b/mysql-test/main/selectivity_innodb.result
index 3edf6d8e929..fd0cea27a81 100644
--- a/mysql-test/main/selectivity_innodb.result
+++ b/mysql-test/main/selectivity_innodb.result
@@ -845,7 +845,7 @@ flush table t1;
 set optimizer_use_condition_selectivity=4;
 explain extended select * from t1 where a=0;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1025	0.39	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1025	0.79	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 0
 drop table t1;
@@ -1661,7 +1661,7 @@ test.t1	analyze	status	OK
 # Check what info the optimizer has about selectivities
 explain extended select * from t1 use index () where a in (17,51,5);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	3.90	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	1000	3.91	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` USE INDEX () where `test`.`t1`.`a` in (17,51,5)
 explain extended select * from t1 use index () where b=2;
@@ -1953,11 +1953,78 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	25.00	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
+DROP TABLE t1;
+# End of 10.2 tests
+#
+# MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
+#
+create table t0(a int);
+insert into t0 select 1 from seq_1_to_78;
+create table t1(a int);
+insert into t1 select 1 from seq_1_to_26;
+create table t10 (a int);
+insert into t10 select 0 from t0, seq_1_to_4;
+insert into t10 select 8693 from t1;
+insert into t10 select 8694 from t1;
+insert into t10 select 8695 from t1;
+insert into t10 select 34783 from t1;
+insert into t10 select 34784 from t1;
+insert into t10 select 34785 from t1;
+insert into t10 select 34785 from t0, seq_1_to_8;
+insert into t10 select 65214 from t1;
+insert into t10 select 65215 from t1;
+insert into t10 select 65216 from t1;
+insert into t10 select 65216 from t0, seq_1_to_52;
+insert into t10 select 65217 from t1;
+insert into t10 select 65218 from t1;
+insert into t10 select 65219 from t1;
+insert into t10 select 65219 from t0;
+insert into t10 select 73913 from t1;
+insert into t10 select 73914 from t1;
+insert into t10 select 73915 from t1;
+insert into t10 select 73915 from t0, seq_1_to_40;
+insert into t10 select 78257 from t1;
+insert into t10 select 78258 from t1;
+insert into t10 select 78259 from t1;
+insert into t10 select 91300 from t1;
+insert into t10 select 91301 from t1;
+insert into t10 select 91302 from t1;
+insert into t10 select 91302 from t0, seq_1_to_6;
+insert into t10 select 91303 from t1;
+insert into t10 select 91304 from t1;
+insert into t10 select 91305 from t1;
+insert into t10 select 91305 from t0, seq_1_to_8;
+insert into t10 select  99998 from t1;
+insert into t10 select  99999 from t1;
+insert into t10 select 100000 from t1;
+set use_stat_tables=preferably;
+analyze table t10 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t10	analyze	status	Engine-independent statistics collected
+test.t10	analyze	status	OK
+flush tables;
+set statement optimizer_trace=1 for
+explain select * from t10  where a in (91303);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t10	ALL	NULL	NULL	NULL	NULL	9984	Using where
+# Must have selectivity_from_histogram <= 1.0:
+select json_detailed(json_extract(trace, '$**.selectivity_for_columns')) 
+from information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
+[
+    [
+        {
+            "column_name": "a",
+            "ranges": 
+            ["91303 <= a <= 91303"],
+            "selectivity_from_histogram": 0.035714283
+        }
+    ]
+]
+drop table t0,t1,t10;
 set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
 set histogram_size=@save_histogram_size;
 set use_stat_tables= @save_use_stat_tables;
-DROP TABLE t1;
-# End of 10.2 tests
 set @@global.histogram_size=@save_histogram_size;
 set optimizer_switch=@save_optimizer_switch_for_selectivity_test;
 set @tmp_ust= @@use_stat_tables;
diff --git a/mysql-test/main/selectivity_no_engine.result b/mysql-test/main/selectivity_no_engine.result
index 3811b12a1be..9cb1a54e0b1 100644
--- a/mysql-test/main/selectivity_no_engine.result
+++ b/mysql-test/main/selectivity_no_engine.result
@@ -36,12 +36,12 @@ test.t2	analyze	status	OK
 # The following two must have the same in 'Extra' column:
 explain extended select * from t2 where col1 IN (20, 180);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	2.00	Using where
 Warnings:
 Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where `test`.`t2`.`col1` in (20,180)
 explain extended select * from t2 where col1 IN (180, 20);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	1.35	Using where
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	1100	2.00	Using where
 Warnings:
 Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where `test`.`t2`.`col1` in (180,20)
 drop table t1, t2;
@@ -102,7 +102,7 @@ test.t1	analyze	status	Engine-independent statistics collected
 test.t1	analyze	status	OK
 explain extended select * from t1 where col1 in (1,2,3);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.37	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	10000	3.00	Using where
 Warnings:
 Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` in (1,2,3)
 # Must not cause fp division by zero, or produce nonsense numbers:
author	Sergei Petrunia <sergey@mariadb.com>	2023-04-27 18:07:55 +0300
committer	Sergei Petrunia <sergey@mariadb.com>	2023-04-27 20:08:16 +0300
commit	5285832b7810a9bee4f1c36ac0a4caff113ae949 (patch)
tree	02b4c144ba2792b5f0cc1f0bd8cda081fe8e6785 /mysql-test
parent	8f87023d3f3fbaad4e33991713db884cbe052fbc (diff)
download	mariadb-git-bb-10.6-mdev31067-variant3.tar.gz