summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/main/selectivity.result77
-rw-r--r--mysql-test/main/selectivity.test85
-rw-r--r--mysql-test/main/selectivity_innodb.result75
-rw-r--r--mysql-test/main/selectivity_no_engine.result6
-rw-r--r--sql/sql_statistics.cc44
5 files changed, 234 insertions, 53 deletions
diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result
index 0b922609916..7d7343847cd 100644
--- a/mysql-test/main/selectivity.result
+++ b/mysql-test/main/selectivity.result
@@ -834,7 +834,7 @@ flush table t1;
set optimizer_use_condition_selectivity=4;
explain extended select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 ALL NULL NULL NULL NULL 1025 0.39 Using where
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1025 0.78 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 0
drop table t1;
@@ -1649,7 +1649,7 @@ test.t1 analyze status Table is already up to date
# Check what info the optimizer has about selectivities
explain extended select * from t1 use index () where a in (17,51,5);
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 3.90 Using where
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 3.91 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` USE INDEX () where `test`.`t1`.`a` in (17,51,5)
explain extended select * from t1 use index () where b=2;
@@ -1935,9 +1935,78 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 5 25.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
+DROP TABLE t1;
+# End of 10.2 tests
+#
+# MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
+#
+create table t0(a int);
+insert into t0 select 1 from seq_1_to_78;
+create table t1(a int);
+insert into t1 select 1 from seq_1_to_26;
+create table t10 (a int);
+insert into t10 select 0 from t0, seq_1_to_4;
+insert into t10 select 8693 from t1;
+insert into t10 select 8694 from t1;
+insert into t10 select 8695 from t1;
+insert into t10 select 34783 from t1;
+insert into t10 select 34784 from t1;
+insert into t10 select 34785 from t1;
+insert into t10 select 34785 from t0, seq_1_to_8;
+insert into t10 select 65214 from t1;
+insert into t10 select 65215 from t1;
+insert into t10 select 65216 from t1;
+insert into t10 select 65216 from t0, seq_1_to_52;
+insert into t10 select 65217 from t1;
+insert into t10 select 65218 from t1;
+insert into t10 select 65219 from t1;
+insert into t10 select 65219 from t0;
+insert into t10 select 73913 from t1;
+insert into t10 select 73914 from t1;
+insert into t10 select 73915 from t1;
+insert into t10 select 73915 from t0, seq_1_to_40;
+insert into t10 select 78257 from t1;
+insert into t10 select 78258 from t1;
+insert into t10 select 78259 from t1;
+insert into t10 select 91300 from t1;
+insert into t10 select 91301 from t1;
+insert into t10 select 91302 from t1;
+insert into t10 select 91302 from t0, seq_1_to_6;
+insert into t10 select 91303 from t1;
+insert into t10 select 91304 from t1;
+insert into t10 select 91305 from t1;
+insert into t10 select 91305 from t0, seq_1_to_8;
+insert into t10 select 99998 from t1;
+insert into t10 select 99999 from t1;
+insert into t10 select 100000 from t1;
+set use_stat_tables=preferably;
+analyze table t10 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status OK
+flush tables;
+set @tmp=@@optimizer_trace;
+set optimizer_trace=1;
+explain select * from t10 where a in (91303);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10 ALL NULL NULL NULL NULL 9984 Using where
+# Must have selectivity_from_histogram <= 1.0:
+select json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
+from information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
+[
+ [
+ {
+ "column_name": "a",
+ "ranges":
+ ["91303 <= a <= 91303"],
+ "selectivity_from_histogram": 0.0357
+ }
+ ]
+]
+set optimizer_trace=@tmp;
+drop table t0,t1,t10;
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set histogram_size=@save_histogram_size;
set use_stat_tables= @save_use_stat_tables;
-DROP TABLE t1;
-# End of 10.2 tests
set @@global.histogram_size=@save_histogram_size;
diff --git a/mysql-test/main/selectivity.test b/mysql-test/main/selectivity.test
index 9c82a8d37a7..06a3e32da95 100644
--- a/mysql-test/main/selectivity.test
+++ b/mysql-test/main/selectivity.test
@@ -1319,14 +1319,93 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
FLUSH TABLES;
EXPLAIN EXTENDED SELECT * FROM t1 WHERE a=2;
-set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
-set histogram_size=@save_histogram_size;
-set use_stat_tables= @save_use_stat_tables;
DROP TABLE t1;
--echo # End of 10.2 tests
+--echo #
+--echo # MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
+--echo #
+create table t0(a int); # This holds how many rows we hold in a bucket.
+insert into t0 select 1 from seq_1_to_78;
+
+create table t1(a int); # one-third of a bucket
+insert into t1 select 1 from seq_1_to_26;
+
+create table t10 (a int);
+insert into t10 select 0 from t0, seq_1_to_4;
+
+insert into t10 select 8693 from t1;
+insert into t10 select 8694 from t1;
+insert into t10 select 8695 from t1;
+
+
+insert into t10 select 34783 from t1;
+insert into t10 select 34784 from t1;
+insert into t10 select 34785 from t1;
+
+
+insert into t10 select 34785 from t0, seq_1_to_8;
+
+insert into t10 select 65214 from t1;
+insert into t10 select 65215 from t1;
+insert into t10 select 65216 from t1;
+
+insert into t10 select 65216 from t0, seq_1_to_52;
+
+insert into t10 select 65217 from t1;
+insert into t10 select 65218 from t1;
+insert into t10 select 65219 from t1;
+
+insert into t10 select 65219 from t0;
+
+
+insert into t10 select 73913 from t1;
+insert into t10 select 73914 from t1;
+insert into t10 select 73915 from t1;
+
+insert into t10 select 73915 from t0, seq_1_to_40;
+
+
+insert into t10 select 78257 from t1;
+insert into t10 select 78258 from t1;
+insert into t10 select 78259 from t1;
+
+insert into t10 select 91300 from t1;
+insert into t10 select 91301 from t1;
+insert into t10 select 91302 from t1;
+
+insert into t10 select 91302 from t0, seq_1_to_6;
+
+insert into t10 select 91303 from t1; # Only 1/3rd of bucket matches the search tuple
+insert into t10 select 91304 from t1;
+insert into t10 select 91305 from t1;
+
+insert into t10 select 91305 from t0, seq_1_to_8;
+
+insert into t10 select 99998 from t1;
+insert into t10 select 99999 from t1;
+insert into t10 select 100000 from t1;
+
+set use_stat_tables=preferably;
+analyze table t10 persistent for all;
+flush tables;
+
+set @tmp=@@optimizer_trace;
+set optimizer_trace=1;
+explain select * from t10 where a in (91303);
+
+--echo # Must have selectivity_from_histogram <= 1.0:
+select json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
+from information_schema.optimizer_trace;
+
+set optimizer_trace=@tmp;
+drop table t0,t1,t10;
+
+set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set histogram_size=@save_histogram_size;
+set use_stat_tables= @save_use_stat_tables;
#
# Clean up
#
diff --git a/mysql-test/main/selectivity_innodb.result b/mysql-test/main/selectivity_innodb.result
index 4ee2da02ec6..dfba12f2b05 100644
--- a/mysql-test/main/selectivity_innodb.result
+++ b/mysql-test/main/selectivity_innodb.result
@@ -843,7 +843,7 @@ flush table t1;
set optimizer_use_condition_selectivity=4;
explain extended select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 ALL NULL NULL NULL NULL 1025 0.39 Using where
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1025 0.78 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 0
drop table t1;
@@ -1659,7 +1659,7 @@ test.t1 analyze status OK
# Check what info the optimizer has about selectivities
explain extended select * from t1 use index () where a in (17,51,5);
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 3.90 Using where
+1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 3.91 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` USE INDEX () where `test`.`t1`.`a` in (17,51,5)
explain extended select * from t1 use index () where b=2;
@@ -1945,11 +1945,78 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 5 25.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`a` = 2
+DROP TABLE t1;
+# End of 10.2 tests
+#
+# MDEV-31067: selectivity_from_histogram >1.0 for a DOUBLE_PREC_HB histogram
+#
+create table t0(a int);
+insert into t0 select 1 from seq_1_to_78;
+create table t1(a int);
+insert into t1 select 1 from seq_1_to_26;
+create table t10 (a int);
+insert into t10 select 0 from t0, seq_1_to_4;
+insert into t10 select 8693 from t1;
+insert into t10 select 8694 from t1;
+insert into t10 select 8695 from t1;
+insert into t10 select 34783 from t1;
+insert into t10 select 34784 from t1;
+insert into t10 select 34785 from t1;
+insert into t10 select 34785 from t0, seq_1_to_8;
+insert into t10 select 65214 from t1;
+insert into t10 select 65215 from t1;
+insert into t10 select 65216 from t1;
+insert into t10 select 65216 from t0, seq_1_to_52;
+insert into t10 select 65217 from t1;
+insert into t10 select 65218 from t1;
+insert into t10 select 65219 from t1;
+insert into t10 select 65219 from t0;
+insert into t10 select 73913 from t1;
+insert into t10 select 73914 from t1;
+insert into t10 select 73915 from t1;
+insert into t10 select 73915 from t0, seq_1_to_40;
+insert into t10 select 78257 from t1;
+insert into t10 select 78258 from t1;
+insert into t10 select 78259 from t1;
+insert into t10 select 91300 from t1;
+insert into t10 select 91301 from t1;
+insert into t10 select 91302 from t1;
+insert into t10 select 91302 from t0, seq_1_to_6;
+insert into t10 select 91303 from t1;
+insert into t10 select 91304 from t1;
+insert into t10 select 91305 from t1;
+insert into t10 select 91305 from t0, seq_1_to_8;
+insert into t10 select 99998 from t1;
+insert into t10 select 99999 from t1;
+insert into t10 select 100000 from t1;
+set use_stat_tables=preferably;
+analyze table t10 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status OK
+flush tables;
+set statement optimizer_trace=1 for
+explain select * from t10 where a in (91303);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t10 ALL NULL NULL NULL NULL 9984 Using where
+# Must have selectivity_from_histogram <= 1.0:
+select json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
+from information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.selectivity_for_columns'))
+[
+ [
+ {
+ "column_name": "a",
+ "ranges":
+ ["91303 <= a <= 91303"],
+ "selectivity_from_histogram": 0.035714283
+ }
+ ]
+]
+drop table t0,t1,t10;
set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set histogram_size=@save_histogram_size;
set use_stat_tables= @save_use_stat_tables;
-DROP TABLE t1;
-# End of 10.2 tests
set @@global.histogram_size=@save_histogram_size;
set optimizer_switch=@save_optimizer_switch_for_selectivity_test;
set @tmp_ust= @@use_stat_tables;
diff --git a/mysql-test/main/selectivity_no_engine.result b/mysql-test/main/selectivity_no_engine.result
index b6830e91f61..9ecf2eea23a 100644
--- a/mysql-test/main/selectivity_no_engine.result
+++ b/mysql-test/main/selectivity_no_engine.result
@@ -36,12 +36,12 @@ test.t2 analyze status OK
# The following two must have the same in 'Extra' column:
explain extended select * from t2 where col1 IN (20, 180);
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.00 Using where
Warnings:
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where `test`.`t2`.`col1` in (20,180)
explain extended select * from t2 where col1 IN (180, 20);
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.35 Using where
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1100 1.00 Using where
Warnings:
Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where `test`.`t2`.`col1` in (180,20)
drop table t1, t2;
@@ -102,7 +102,7 @@ test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
explain extended select * from t1 where col1 in (1,2,3);
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 3.37 Using where
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10000 2.97 Using where
Warnings:
Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where `test`.`t1`.`col1` in (1,2,3)
# Must not cause fp division by zero, or produce nonsense numbers:
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index c7f6d2ff489..f27da375562 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -3902,50 +3902,16 @@ double Histogram::point_selectivity(double pos, double avg_sel)
}
else
{
- /*
+ /*
The value 'pos' fits within one single histogram bucket.
- Histogram buckets have the same numbers of rows, but they cover
- different ranges of values.
-
- We assume that values are uniformly distributed across the [0..1] value
- range.
- */
-
- /*
- If all buckets covered value ranges of the same size, the width of
- value range would be:
+ We also have avg_sel which is per-table average selectivity of col=const.
+ If there are popular values, this may be larger than one bucket, so
+ cap the returned number by the selectivity of one bucket.
*/
double avg_bucket_width= 1.0 / (get_width() + 1);
-
- /*
- Let's see what is the width of value range that our bucket is covering.
- (min==max currently. they are kept in the formula just in case we
- will want to extend it to handle multi-bucket case)
- */
- double inv_prec_factor= (double) 1.0 / prec_factor();
- double current_bucket_width=
- (max + 1 == get_width() ? 1.0 : (get_value(max) * inv_prec_factor)) -
- (min == 0 ? 0.0 : (get_value(min-1) * inv_prec_factor));
-
- DBUG_ASSERT(current_bucket_width); /* We shouldn't get a one zero-width bucket */
-
- /*
- So:
- - each bucket has the same #rows
- - values are unformly distributed across the [min_value,max_value] domain.
- If a bucket has value range that's N times bigger then average, than
- each value will have to have N times fewer rows than average.
- */
- sel= avg_sel * avg_bucket_width / current_bucket_width;
-
- /*
- (Q: if we just follow this proportion we may end up in a situation
- where number of different values we expect to find in this bucket
- exceeds the number of rows that this histogram has in a bucket. Are
- we ok with this or we would want to have certain caps?)
- */
+ sel= MY_MIN(avg_bucket_width, avg_sel);
}
return sel;
}