diff options
author | Varun Gupta <varun.gupta@mariadb.com> | 2021-03-05 10:55:51 +0530 |
---|---|---|
committer | Varun Gupta <varun.gupta@mariadb.com> | 2021-03-05 15:29:45 +0530 |
commit | 218c6a6bfb3b6886df9104d6febd144e3bf5b9e1 (patch) | |
tree | 2d2e693ad7f54c57ed3ef6be47f40c55544fe159 | |
parent | d542c8d61f47ee46a09fd10a046f262ff6a08336 (diff) | |
download | mariadb-git-218c6a6bfb3b6886df9104d6febd144e3bf5b9e1.tar.gz |
Added a function to estimate the join output cardinality.
-rw-r--r-- | mysql-test/main/join_cardinality.result | 85 | ||||
-rw-r--r-- | mysql-test/main/join_cardinality.test | 58 | ||||
-rw-r--r-- | sql/sql_select.cc | 118 | ||||
-rw-r--r-- | sql/sql_select.h | 3 |
4 files changed, 225 insertions, 39 deletions
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result index e98de9c087b..4249ec27e1b 100644 --- a/mysql-test/main/join_cardinality.result +++ b/mysql-test/main/join_cardinality.result @@ -1076,4 +1076,87 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) false ] DROP TABLE t1,t2; -SET optimizer_switch=@save_optimizer_switch; +# +# Test to check the estimates of join output cardinality +# +CREATE TABLE t1(a int, b int); +INSERT INTO t1 SELECT seq-1, seq-1 from seq_1_to_20; +INSERT INTO t1 SELECT seq-1, seq-1 from seq_1_to_20; +CREATE TABLE t2(a int, b int); +INSERT INTO t2 SELECT seq-1, seq-1 from seq_1_to_10; +INSERT INTO t2 SELECT seq-1, seq-1 from seq_1_to_10; +CREATE TABLE t3(a int, b int); +INSERT INTO t3 SELECT seq-1, seq-1 from seq_1_to_100; +INSERT INTO t3 SELECT seq-1, seq-1 from seq_1_to_100; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +ANALYZE TABLE t2 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +ANALYZE TABLE t3 PERSISTENT FOR ALL; +Table Op Msg_type Msg_text +test.t3 analyze status Engine-independent statistics collected +test.t3 analyze status OK +EXPLAIN SELECT * FROM t1,t2 +WHERE t1.a = t2.a +ORDER BY t2.b +LIMIT 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 20 Using filesort +1 SIMPLE t1 ALL NULL NULL NULL NULL 40 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate')) +[ + 40 +] +EXPLAIN SELECT * FROM t1,t2,t3 +WHERE t1.a = t2.a AND t3.a=t2.a +ORDER BY t2.b +LIMIT 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 20 Using filesort +1 SIMPLE t1 ALL NULL NULL NULL NULL 40 Using where +1 SIMPLE t3 ALL NULL NULL NULL NULL 200 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate')) +[ + 80 +] +EXPLAIN SELECT * FROM t1,t2,t3 +WHERE t1.a = t2.a AND t3.b=t2.b +ORDER BY t2.b +LIMIT 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t3 ALL NULL NULL NULL NULL 200 Using filesort +1 SIMPLE t2 ALL NULL NULL NULL NULL 20 Using where +1 SIMPLE t1 ALL NULL NULL NULL NULL 40 Using where +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +[ + true +] +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate')) +[ + 80 +] +DROP TABLE t1,t2,t3; diff --git a/mysql-test/main/join_cardinality.test b/mysql-test/main/join_cardinality.test index 92b8a1b1c87..3c43a3223dc 100644 --- a/mysql-test/main/join_cardinality.test +++ b/mysql-test/main/join_cardinality.test @@ -5,6 +5,7 @@ SET @save_optimizer_switch=@@optimizer_switch; SET optimizer_trace=1; SET optimizer_switch='rowid_filter=off'; +set optimizer_switch='cost_based_order_by_limit=on'; CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a)); INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100; @@ -391,10 +392,63 @@ INSERT INTO t2 SELECT seq, seq from seq_1_to_10 ; ANALYZE TABLE t1 PERSISTENT FOR ALL; ANALYZE TABLE t2 PERSISTENT FOR ALL; -EXPLAIN EXTENDED SELECT * FROM t1,t2 WHERE t1.a= t2.a; +EXPLAIN EXTENDED SELECT * FROM t1,t2 WHERE t1.a= t2.a ORDER BY t1.b LIMIT 10; SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; DROP TABLE t1,t2; -SET optimizer_switch=@save_optimizer_switch; +--echo # +--echo # Test to check the estimates of join output cardinality +--echo # + +CREATE TABLE t1(a int, b int); +INSERT INTO t1 SELECT seq-1, seq-1 from seq_1_to_20; +INSERT INTO t1 SELECT seq-1, seq-1 from seq_1_to_20; +CREATE TABLE t2(a int, b int); +INSERT INTO t2 SELECT seq-1, seq-1 from seq_1_to_10; +INSERT INTO t2 SELECT seq-1, seq-1 from seq_1_to_10; +CREATE TABLE t3(a int, b int); +INSERT INTO t3 SELECT seq-1, seq-1 from seq_1_to_100; +INSERT INTO t3 SELECT seq-1, seq-1 from seq_1_to_100; +ANALYZE TABLE t1 PERSISTENT FOR ALL; +ANALYZE TABLE t2 PERSISTENT FOR ALL; +ANALYZE TABLE t3 PERSISTENT FOR ALL; + +let $query= +SELECT * FROM t1,t2 +WHERE t1.a = t2.a +ORDER BY t2.b +LIMIT 5; + +eval EXPLAIN $query; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +let $query= +SELECT * FROM t1,t2,t3 +WHERE t1.a = t2.a AND t3.a=t2.a +ORDER BY t2.b +LIMIT 5; + +eval EXPLAIN $query; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +let $query= +SELECT * FROM t1,t2,t3 +WHERE t1.a = t2.a AND t3.b=t2.b +ORDER BY t2.b +LIMIT 5; + +eval EXPLAIN $query; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; +SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_estimate')) +FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; + +DROP TABLE t1,t2,t3; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 7595ed4c99b..c54e3f9af0a 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8497,6 +8497,12 @@ choose_plan(JOIN *join, table_map join_tables) wrapper.add("cardinality_accurate", cardinality_accurate); if (!cardinality_accurate) join->sort_nest_possible= false; + + if (join->sort_nest_possible) + { + join->estimate_cardinality_for_join(join_tables); + wrapper.add("cardinality_estimate", join->cardinality_estimate); + } } Json_writer_array trace_plan(thd,"considered_execution_plans"); @@ -8518,10 +8524,6 @@ choose_plan(JOIN *join, table_map join_tables) /* Automatically determine a reasonable value for 'search_depth' */ search_depth= determine_search_depth(join); - if (join->sort_nest_possible && - join->estimate_cardinality_for_join(join_tables)) - DBUG_RETURN(TRUE); - if (greedy_search(join, join_tables, search_depth, prune_level, use_cond_selectivity)) DBUG_RETURN(TRUE); @@ -29785,50 +29787,96 @@ bool JOIN::is_order_by_expensive() cardinality for a join by reducing the number of permutations of join orders to be considered. We can do this by running the join planner with a small value for system variable optimizer_search_depth. - - @retval - TRUE error - FALSE otherwise */ -bool JOIN::estimate_cardinality_for_join(table_map joined_tables) +void JOIN::estimate_cardinality_for_join(table_map joined_tables) { - Json_writer_temp_disable disable_tracing(thd); - uint use_cond_selectivity; - uint search_depth= thd->variables.optimizer_search_depth; - if (search_depth == 0) - search_depth= determine_search_depth(this); - uint prune_level= thd->variables.optimizer_prune_level; - use_cond_selectivity= thd->variables.optimizer_use_condition_selectivity; + /* + cardinality_estimate should be set to the value of the estimate join + output cardinality + */ + cardinality_estimate= 1; + JOIN_TAB *tab; + cardinality_estimate*= multi_eq_join_cond_selectivity(); + for (JOIN_TAB **pos= best_ref; (tab= *pos) ; pos++) + { + TABLE *table= tab->table; + cardinality_estimate*= (table->stat_records() * table->cond_selectivity); + } +} - TABLE *save_sort_by_table= sort_by_table; - JOIN_TAB *save_best_ref[MAX_TABLES]; - for (uint i= 0; i < table_count; i++) - save_best_ref[i]= best_ref[i]; - get_cardinality_estimate= TRUE; - cardinality_estimate= DBL_MAX; +static int cmp_double(double *a, double *b) +{ + return *a < *b ? -1 : 1; +} - if (greedy_search(this, joined_tables, search_depth, prune_level, - use_cond_selectivity)) - return TRUE; - set_if_bigger(join_record_count, 1); - cardinality_estimate= join_record_count; +double JOIN::multi_eq_join_cond_selectivity() +{ + double sel= 1.0; + uint max_size= 0; + if (!cond_equal || !cond_equal->current_level.elements) + return sel; - cur_embedding_map= 0; - reset_nj_counters(this, join_list); - cur_sj_inner_tables= 0; - get_cardinality_estimate= FALSE; - sort_by_table= save_sort_by_table; + Item_equal *item_equal; + List_iterator_fast<Item_equal> it(cond_equal->current_level); + while ((item_equal= it++)) + { + set_if_bigger(max_size, item_equal->elements_count()); + } - for (uint i= 0; i < table_count; i++) - best_ref[i]= save_best_ref[i]; + double *ndv_for_cols= new double[max_size]; + it.rewind(); + while ((item_equal= it++)) + { + uint size= item_equal->elements_count(); + /* + If the multiple equality involves a constant, then the selectivity for + all the predicates is already taken into account in + TABLE::cond_selectivity + */ + if (item_equal->get_const()) + continue; - return FALSE; + /* + The formula used to calculate the selectivity for an equi-join + predicate(col1=col2) is: + selectivity(col1=col2) = 1/max(ndv(col1), ndv(col2)); + + The selectivity of the entire multiple equality like + col1=col2 = ............colN is: + 1 / ( ndv(col2) * ndv(col3) * ... * ndv(colN) ) + + Assuming that + ndv(col1) <= ndv(col2) <= ... <=ndv(colN) + */ + uint idx=0; + Item_equal_fields_iterator fi(*item_equal); + while ((fi++)) + { + double curr_eq_fld_sel; + Field *fld= fi.get_curr_field(); + curr_eq_fld_sel= get_column_avg_frequency(fld) / + fld->table->stat_records(); + ndv_for_cols[idx++]= curr_eq_fld_sel; + } + + my_qsort(ndv_for_cols, size, sizeof(double), (qsort_cmp)cmp_double); + + idx= 0; + while (idx < size-1) + { + sel*= ndv_for_cols[idx++]; + } + } + + delete [] ndv_for_cols; + return sel; } + /* @brief Re-setup accesses that use index on the first non-const table for ordering diff --git a/sql/sql_select.h b/sql/sql_select.h index 825016e8d4a..db24a9f0aa9 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1955,7 +1955,8 @@ public: bool sort_nest_allowed(); bool is_order_by_expensive(); - bool estimate_cardinality_for_join(table_map joined_tables); + void estimate_cardinality_for_join(table_map joined_tables); + double multi_eq_join_cond_selectivity(); bool check_if_sort_nest_present(uint* n_tables, table_map *tables_map); bool create_sort_nest_info(uint n_tables, table_map nest_tables_map); bool remove_const_from_order_by(); |