summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVarun Gupta <varun.gupta@mariadb.com>2021-03-05 10:55:51 +0530
committerVarun Gupta <varun.gupta@mariadb.com>2021-03-05 15:29:45 +0530
commit218c6a6bfb3b6886df9104d6febd144e3bf5b9e1 (patch)
tree2d2e693ad7f54c57ed3ef6be47f40c55544fe159
parentd542c8d61f47ee46a09fd10a046f262ff6a08336 (diff)
downloadmariadb-git-218c6a6bfb3b6886df9104d6febd144e3bf5b9e1.tar.gz
Added a function to estimate the join output cardinality.
-rw-r--r--mysql-test/main/join_cardinality.result85
-rw-r--r--mysql-test/main/join_cardinality.test58
-rw-r--r--sql/sql_select.cc118
-rw-r--r--sql/sql_select.h3
4 files changed, 225 insertions, 39 deletions
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result
index e98de9c087b..4249ec27e1b 100644
--- a/mysql-test/main/join_cardinality.result
+++ b/mysql-test/main/join_cardinality.result
@@ -1076,4 +1076,87 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
false
]
DROP TABLE t1,t2;
-SET optimizer_switch=@save_optimizer_switch;
+#
+# Test to check the estimates of join output cardinality
+#
+CREATE TABLE t1(a int, b int);
+INSERT INTO t1 SELECT seq-1, seq-1 from seq_1_to_20;
+INSERT INTO t1 SELECT seq-1, seq-1 from seq_1_to_20;
+CREATE TABLE t2(a int, b int);
+INSERT INTO t2 SELECT seq-1, seq-1 from seq_1_to_10;
+INSERT INTO t2 SELECT seq-1, seq-1 from seq_1_to_10;
+CREATE TABLE t3(a int, b int);
+INSERT INTO t3 SELECT seq-1, seq-1 from seq_1_to_100;
+INSERT INTO t3 SELECT seq-1, seq-1 from seq_1_to_100;
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+ANALYZE TABLE t2 PERSISTENT FOR ALL;
+Table Op Msg_type Msg_text
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+ANALYZE TABLE t3 PERSISTENT FOR ALL;
+Table Op Msg_type Msg_text
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status OK
+EXPLAIN SELECT * FROM t1,t2
+WHERE t1.a = t2.a
+ORDER BY t2.b
+LIMIT 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 20 Using filesort
+1 SIMPLE t1 ALL NULL NULL NULL NULL 40 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate'))
+[
+ 40
+]
+EXPLAIN SELECT * FROM t1,t2,t3
+WHERE t1.a = t2.a AND t3.a=t2.a
+ORDER BY t2.b
+LIMIT 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 20 Using filesort
+1 SIMPLE t1 ALL NULL NULL NULL NULL 40 Using where
+1 SIMPLE t3 ALL NULL NULL NULL NULL 200 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate'))
+[
+ 80
+]
+EXPLAIN SELECT * FROM t1,t2,t3
+WHERE t1.a = t2.a AND t3.b=t2.b
+ORDER BY t2.b
+LIMIT 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t3 ALL NULL NULL NULL NULL 200 Using filesort
+1 SIMPLE t2 ALL NULL NULL NULL NULL 20 Using where
+1 SIMPLE t1 ALL NULL NULL NULL NULL 40 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true
+]
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate'))
+[
+ 80
+]
+DROP TABLE t1,t2,t3;
diff --git a/mysql-test/main/join_cardinality.test b/mysql-test/main/join_cardinality.test
index 92b8a1b1c87..3c43a3223dc 100644
--- a/mysql-test/main/join_cardinality.test
+++ b/mysql-test/main/join_cardinality.test
@@ -5,6 +5,7 @@
SET @save_optimizer_switch=@@optimizer_switch;
SET optimizer_trace=1;
SET optimizer_switch='rowid_filter=off';
+set optimizer_switch='cost_based_order_by_limit=on';
CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a));
INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100;
@@ -391,10 +392,63 @@ INSERT INTO t2 SELECT seq, seq from seq_1_to_10 ;
ANALYZE TABLE t1 PERSISTENT FOR ALL;
ANALYZE TABLE t2 PERSISTENT FOR ALL;
-EXPLAIN EXTENDED SELECT * FROM t1,t2 WHERE t1.a= t2.a;
+EXPLAIN EXTENDED SELECT * FROM t1,t2 WHERE t1.a= t2.a ORDER BY t1.b LIMIT 10;
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
DROP TABLE t1,t2;
-SET optimizer_switch=@save_optimizer_switch;
+--echo #
+--echo # Test to check the estimates of join output cardinality
+--echo #
+
+CREATE TABLE t1(a int, b int);
+INSERT INTO t1 SELECT seq-1, seq-1 from seq_1_to_20;
+INSERT INTO t1 SELECT seq-1, seq-1 from seq_1_to_20;
+CREATE TABLE t2(a int, b int);
+INSERT INTO t2 SELECT seq-1, seq-1 from seq_1_to_10;
+INSERT INTO t2 SELECT seq-1, seq-1 from seq_1_to_10;
+CREATE TABLE t3(a int, b int);
+INSERT INTO t3 SELECT seq-1, seq-1 from seq_1_to_100;
+INSERT INTO t3 SELECT seq-1, seq-1 from seq_1_to_100;
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+ANALYZE TABLE t2 PERSISTENT FOR ALL;
+ANALYZE TABLE t3 PERSISTENT FOR ALL;
+
+let $query=
+SELECT * FROM t1,t2
+WHERE t1.a = t2.a
+ORDER BY t2.b
+LIMIT 5;
+
+eval EXPLAIN $query;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+let $query=
+SELECT * FROM t1,t2,t3
+WHERE t1.a = t2.a AND t3.a=t2.a
+ORDER BY t2.b
+LIMIT 5;
+
+eval EXPLAIN $query;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+let $query=
+SELECT * FROM t1,t2,t3
+WHERE t1.a = t2.a AND t3.b=t2.b
+ORDER BY t2.b
+LIMIT 5;
+
+eval EXPLAIN $query;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+DROP TABLE t1,t2,t3;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 7595ed4c99b..c54e3f9af0a 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -8497,6 +8497,12 @@ choose_plan(JOIN *join, table_map join_tables)
wrapper.add("cardinality_accurate", cardinality_accurate);
if (!cardinality_accurate)
join->sort_nest_possible= false;
+
+ if (join->sort_nest_possible)
+ {
+ join->estimate_cardinality_for_join(join_tables);
+ wrapper.add("cardinality_estimate", join->cardinality_estimate);
+ }
}
Json_writer_array trace_plan(thd,"considered_execution_plans");
@@ -8518,10 +8524,6 @@ choose_plan(JOIN *join, table_map join_tables)
/* Automatically determine a reasonable value for 'search_depth' */
search_depth= determine_search_depth(join);
- if (join->sort_nest_possible &&
- join->estimate_cardinality_for_join(join_tables))
- DBUG_RETURN(TRUE);
-
if (greedy_search(join, join_tables, search_depth, prune_level,
use_cond_selectivity))
DBUG_RETURN(TRUE);
@@ -29785,50 +29787,96 @@ bool JOIN::is_order_by_expensive()
cardinality for a join by reducing the number of permutations of
join orders to be considered. We can do this by running the join planner
with a small value for system variable optimizer_search_depth.
-
- @retval
- TRUE error
- FALSE otherwise
*/
-bool JOIN::estimate_cardinality_for_join(table_map joined_tables)
+void JOIN::estimate_cardinality_for_join(table_map joined_tables)
{
- Json_writer_temp_disable disable_tracing(thd);
- uint use_cond_selectivity;
- uint search_depth= thd->variables.optimizer_search_depth;
- if (search_depth == 0)
- search_depth= determine_search_depth(this);
- uint prune_level= thd->variables.optimizer_prune_level;
- use_cond_selectivity= thd->variables.optimizer_use_condition_selectivity;
+ /*
+ cardinality_estimate should be set to the value of the estimate join
+ output cardinality
+ */
+ cardinality_estimate= 1;
+ JOIN_TAB *tab;
+ cardinality_estimate*= multi_eq_join_cond_selectivity();
+ for (JOIN_TAB **pos= best_ref; (tab= *pos) ; pos++)
+ {
+ TABLE *table= tab->table;
+ cardinality_estimate*= (table->stat_records() * table->cond_selectivity);
+ }
+}
- TABLE *save_sort_by_table= sort_by_table;
- JOIN_TAB *save_best_ref[MAX_TABLES];
- for (uint i= 0; i < table_count; i++)
- save_best_ref[i]= best_ref[i];
- get_cardinality_estimate= TRUE;
- cardinality_estimate= DBL_MAX;
+static int cmp_double(double *a, double *b)
+{
+ return *a < *b ? -1 : 1;
+}
- if (greedy_search(this, joined_tables, search_depth, prune_level,
- use_cond_selectivity))
- return TRUE;
- set_if_bigger(join_record_count, 1);
- cardinality_estimate= join_record_count;
+double JOIN::multi_eq_join_cond_selectivity()
+{
+ double sel= 1.0;
+ uint max_size= 0;
+ if (!cond_equal || !cond_equal->current_level.elements)
+ return sel;
- cur_embedding_map= 0;
- reset_nj_counters(this, join_list);
- cur_sj_inner_tables= 0;
- get_cardinality_estimate= FALSE;
- sort_by_table= save_sort_by_table;
+ Item_equal *item_equal;
+ List_iterator_fast<Item_equal> it(cond_equal->current_level);
+ while ((item_equal= it++))
+ {
+ set_if_bigger(max_size, item_equal->elements_count());
+ }
- for (uint i= 0; i < table_count; i++)
- best_ref[i]= save_best_ref[i];
+ double *ndv_for_cols= new double[max_size];
+ it.rewind();
+ while ((item_equal= it++))
+ {
+ uint size= item_equal->elements_count();
+ /*
+ If the multiple equality involves a constant, then the selectivity for
+ all the predicates is already taken into account in
+ TABLE::cond_selectivity
+ */
+ if (item_equal->get_const())
+ continue;
- return FALSE;
+ /*
+ The formula used to calculate the selectivity for an equi-join
+ predicate(col1=col2) is:
+ selectivity(col1=col2) = 1/max(ndv(col1), ndv(col2));
+
+ The selectivity of the entire multiple equality like
+ col1=col2 = ............colN is:
+ 1 / ( ndv(col2) * ndv(col3) * ... * ndv(colN) )
+
+ Assuming that
+ ndv(col1) <= ndv(col2) <= ... <=ndv(colN)
+ */
+ uint idx=0;
+ Item_equal_fields_iterator fi(*item_equal);
+ while ((fi++))
+ {
+ double curr_eq_fld_sel;
+ Field *fld= fi.get_curr_field();
+ curr_eq_fld_sel= get_column_avg_frequency(fld) /
+ fld->table->stat_records();
+ ndv_for_cols[idx++]= curr_eq_fld_sel;
+ }
+
+ my_qsort(ndv_for_cols, size, sizeof(double), (qsort_cmp)cmp_double);
+
+ idx= 0;
+ while (idx < size-1)
+ {
+ sel*= ndv_for_cols[idx++];
+ }
+ }
+
+ delete [] ndv_for_cols;
+ return sel;
}
+
/*
@brief
Re-setup accesses that use index on the first non-const table for ordering
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 825016e8d4a..db24a9f0aa9 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -1955,7 +1955,8 @@ public:
bool sort_nest_allowed();
bool is_order_by_expensive();
- bool estimate_cardinality_for_join(table_map joined_tables);
+ void estimate_cardinality_for_join(table_map joined_tables);
+ double multi_eq_join_cond_selectivity();
bool check_if_sort_nest_present(uint* n_tables, table_map *tables_map);
bool create_sort_nest_info(uint n_tables, table_map nest_tables_map);
bool remove_const_from_order_by();