diff options
-rw-r--r-- | mysql-test/main/opt_trace.result | 6 | ||||
-rw-r--r-- | mysql-test/main/rowid_filter.result | 114 | ||||
-rw-r--r-- | mysql-test/main/select.result | 2 | ||||
-rw-r--r-- | mysql-test/main/select_jcl6.result | 2 | ||||
-rw-r--r-- | mysql-test/main/select_pkeycache.result | 2 | ||||
-rw-r--r-- | mysql-test/main/selectivity.result | 4 | ||||
-rw-r--r-- | sql/sql_select.cc | 132 |
7 files changed, 122 insertions, 140 deletions
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index 6a6032e5271..d575c3d8a40 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -1020,7 +1020,6 @@ explain select * from t1,t2 where t1.a=t2.b+2 and t2.a= t1.b { "index": "a", "used_range_estimates": false, "cause": "not available", - "rowid_filter_skipped": "cost_factor <= 0", "rows": 1, "cost": 200.0585794, "chosen": true @@ -1077,7 +1076,6 @@ explain select * from t1,t2 where t1.a=t2.b+2 and t2.a= t1.b { "index": "a", "used_range_estimates": false, "cause": "not available", - "rowid_filter_skipped": "cost_factor <= 0", "rows": 1, "cost": 200.0585794, "chosen": true @@ -4030,7 +4028,6 @@ explain delete t0,t1 from t0, t1 where t0.a=t1.a and t1.a<3 { "index": "a", "used_range_estimates": false, "cause": "not better than ref estimates", - "rowid_filter_skipped": "cost_factor <= 0", "rows": 1, "cost": 3.001757383, "chosen": true @@ -8108,7 +8105,6 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) "index": "b", "used_range_estimates": false, "cause": "not available", - "rowid_filter_skipped": "cost_factor <= 0", "rows": 1, "cost": 20.00585794, "chosen": true @@ -8311,7 +8307,6 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) "index": "a", "used_range_estimates": false, "cause": "not available", - "rowid_filter_skipped": "cost_factor <= 0", "rows": 1, "cost": 20.00585794, "chosen": true @@ -8379,7 +8374,6 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) "index": "a", "used_range_estimates": false, "cause": "not available", - "rowid_filter_skipped": "cost_factor <= 0", "rows": 1, "cost": 200.0585794, "chosen": true diff --git a/mysql-test/main/rowid_filter.result b/mysql-test/main/rowid_filter.result index 9860b2e9ad3..e32b738c4e5 100644 --- a/mysql-test/main/rowid_filter.result +++ b/mysql-test/main/rowid_filter.result @@ -337,8 +337,8 @@ FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND o_totalprice between 200000 and 230000; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 Using index condition -1 SIMPLE orders eq_ref|filter PRIMARY,i_o_totalprice PRIMARY|i_o_totalprice 4|9 dbt3_s001.lineitem.l_orderkey 1 (5%) Using where; Using rowid filter +1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 69 Using index condition +1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (2%) Using where; Using rowid filter set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND @@ -348,40 +348,40 @@ EXPLAIN "query_block": { "select_id": 1, "table": { - "table_name": "lineitem", + "table_name": "orders", "access_type": "range", + "possible_keys": ["PRIMARY", "i_o_totalprice"], + "key": "i_o_totalprice", + "key_length": "9", + "used_key_parts": ["o_totalprice"], + "rows": 69, + "filtered": 100, + "index_condition": "orders.o_totalprice between 200000 and 230000" + }, + "table": { + "table_name": "lineitem", + "access_type": "ref", "possible_keys": [ "PRIMARY", "i_l_shipdate", "i_l_orderkey", "i_l_orderkey_quantity" ], - "key": "i_l_shipdate", - "key_length": "4", - "used_key_parts": ["l_shipDATE"], - "rows": 98, - "filtered": 100, - "index_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'" - }, - "table": { - "table_name": "orders", - "access_type": "eq_ref", - "possible_keys": ["PRIMARY", "i_o_totalprice"], - "key": "PRIMARY", + "key": "i_l_orderkey", "key_length": "4", - "used_key_parts": ["o_orderkey"], - "ref": ["dbt3_s001.lineitem.l_orderkey"], + "used_key_parts": ["l_orderkey"], + "ref": ["dbt3_s001.orders.o_orderkey"], "rowid_filter": { "range": { - "key": "i_o_totalprice", - "used_key_parts": ["o_totalprice"] + "key": "i_l_shipdate", + "used_key_parts": ["l_shipDATE"] }, - "rows": 69, - "selectivity_pct": 4.6 + "rows": 98, + "selectivity_pct": 1.631973356 }, - "rows": 1, - "filtered": 4.599999905, - "attached_condition": "orders.o_totalprice between 200000 and 230000" + "rows": 4, + "filtered": 1.631973386, + "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'" } } } @@ -390,8 +390,8 @@ FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND o_totalprice between 200000 and 230000; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 98.00 100.00 100.00 Using index condition -1 SIMPLE orders eq_ref|filter PRIMARY,i_o_totalprice PRIMARY|i_o_totalprice 4|9 dbt3_s001.lineitem.l_orderkey 1 (5%) 0.11 (10%) 4.60 100.00 Using where; Using rowid filter +1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 69 71.00 100.00 100.00 Using index condition +1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (2%) 0.15 (2%) 1.63 100.00 Using where; Using rowid filter set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND @@ -403,55 +403,55 @@ ANALYZE "r_loops": 1, "r_total_time_ms": "REPLACED", "table": { - "table_name": "lineitem", + "table_name": "orders", "access_type": "range", - "possible_keys": [ - "PRIMARY", - "i_l_shipdate", - "i_l_orderkey", - "i_l_orderkey_quantity" - ], - "key": "i_l_shipdate", - "key_length": "4", - "used_key_parts": ["l_shipDATE"], + "possible_keys": ["PRIMARY", "i_o_totalprice"], + "key": "i_o_totalprice", + "key_length": "9", + "used_key_parts": ["o_totalprice"], "r_loops": 1, - "rows": 98, - "r_rows": 98, + "rows": 69, + "r_rows": 71, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", "filtered": 100, "r_filtered": 100, - "index_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'" + "index_condition": "orders.o_totalprice between 200000 and 230000" }, "table": { - "table_name": "orders", - "access_type": "eq_ref", - "possible_keys": ["PRIMARY", "i_o_totalprice"], - "key": "PRIMARY", + "table_name": "lineitem", + "access_type": "ref", + "possible_keys": [ + "PRIMARY", + "i_l_shipdate", + "i_l_orderkey", + "i_l_orderkey_quantity" + ], + "key": "i_l_orderkey", "key_length": "4", - "used_key_parts": ["o_orderkey"], - "ref": ["dbt3_s001.lineitem.l_orderkey"], + "used_key_parts": ["l_orderkey"], + "ref": ["dbt3_s001.orders.o_orderkey"], "rowid_filter": { "range": { - "key": "i_o_totalprice", - "used_key_parts": ["o_totalprice"] + "key": "i_l_shipdate", + "used_key_parts": ["l_shipDATE"] }, - "rows": 69, - "selectivity_pct": 4.6, - "r_rows": 71, - "r_lookups": 96, - "r_selectivity_pct": 10.41666667, + "rows": 98, + "selectivity_pct": 1.631973356, + "r_rows": 98, + "r_lookups": 476, + "r_selectivity_pct": 2.31092437, "r_buffer_size": "REPLACED", "r_filling_time_ms": "REPLACED" }, - "r_loops": 98, - "rows": 1, - "r_rows": 0.112244898, + "r_loops": 71, + "rows": 4, + "r_rows": 0.154929577, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 4.599999905, + "filtered": 1.631973386, "r_filtered": 100, - "attached_condition": "orders.o_totalprice between 200000 and 230000" + "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'" } } } diff --git a/mysql-test/main/select.result b/mysql-test/main/select.result index 99844d05f2a..abffe58739d 100644 --- a/mysql-test/main/select.result +++ b/mysql-test/main/select.result @@ -3744,7 +3744,7 @@ EXPLAIN SELECT * FROM t1 WHERE ID_better=1 AND ID1_with_null IS NULL AND (ID2_with_null=1 OR ID2_with_null=2); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref idx1,idx2 idx2 4 const 2 Using where +1 SIMPLE t1 ref|filter idx1,idx2 idx1|idx2 5|4 const 2 (1%) Using index condition; Using where; Using rowid filter DROP TABLE t1; CREATE TABLE t1 (a INT, ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, KEY ts(ts)); INSERT INTO t1 VALUES (30,"2006-01-03 23:00:00"), (31,"2006-01-03 23:00:00"); diff --git a/mysql-test/main/select_jcl6.result b/mysql-test/main/select_jcl6.result index a19e98d49a3..1a7eb796e96 100644 --- a/mysql-test/main/select_jcl6.result +++ b/mysql-test/main/select_jcl6.result @@ -3755,7 +3755,7 @@ EXPLAIN SELECT * FROM t1 WHERE ID_better=1 AND ID1_with_null IS NULL AND (ID2_with_null=1 OR ID2_with_null=2); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref idx1,idx2 idx2 4 const 2 Using where +1 SIMPLE t1 ref|filter idx1,idx2 idx1|idx2 5|4 const 2 (1%) Using index condition; Using where; Using rowid filter DROP TABLE t1; CREATE TABLE t1 (a INT, ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, KEY ts(ts)); INSERT INTO t1 VALUES (30,"2006-01-03 23:00:00"), (31,"2006-01-03 23:00:00"); diff --git a/mysql-test/main/select_pkeycache.result b/mysql-test/main/select_pkeycache.result index 99844d05f2a..abffe58739d 100644 --- a/mysql-test/main/select_pkeycache.result +++ b/mysql-test/main/select_pkeycache.result @@ -3744,7 +3744,7 @@ EXPLAIN SELECT * FROM t1 WHERE ID_better=1 AND ID1_with_null IS NULL AND (ID2_with_null=1 OR ID2_with_null=2); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ref idx1,idx2 idx2 4 const 2 Using where +1 SIMPLE t1 ref|filter idx1,idx2 idx1|idx2 5|4 const 2 (1%) Using index condition; Using where; Using rowid filter DROP TABLE t1; CREATE TABLE t1 (a INT, ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, KEY ts(ts)); INSERT INTO t1 VALUES (30,"2006-01-03 23:00:00"), (31,"2006-01-03 23:00:00"); diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result index 1143a4deb46..b5a48d341ae 100644 --- a/mysql-test/main/selectivity.result +++ b/mysql-test/main/selectivity.result @@ -1856,7 +1856,7 @@ WHERE A.a=t1.a AND t2.b < 20); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where 2 DEPENDENT SUBQUERY A ref PRIMARY,a a 5 test.t1.a 1 -2 DEPENDENT SUBQUERY t2 ref a,b a 5 test.A.id 1 Using where +2 DEPENDENT SUBQUERY t2 ref|filter a,b a|b 5|5 test.A.id 1 (10%) Using where; Using rowid filter EXPLAIN SELECT * FROM t1 A, t1 B WHERE A.a = B.a and A.id = 65; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE A const PRIMARY,a PRIMARY 4 const 1 @@ -1868,7 +1868,7 @@ WHERE A.a=t1.a AND t2.b < 20); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where 2 DEPENDENT SUBQUERY A ref PRIMARY,a a 5 test.t1.a 1 -2 DEPENDENT SUBQUERY t2 ref a,b a 5 test.A.id 1 Using where +2 DEPENDENT SUBQUERY t2 ref|filter a,b a|b 5|5 test.A.id 1 (10%) Using where; Using rowid filter set optimizer_switch= @save_optimizer_switch; set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity; drop table t1,t2; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index e0beda6ec61..dfd7d59143f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7903,7 +7903,6 @@ best_access_path(JOIN *join, rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables Json_writer_object trace_access_idx(thd); - double eq_ref_rows= 0.0, eq_ref_cost= 0.0; /* full text keys require special treatment */ @@ -7948,10 +7947,7 @@ best_access_path(JOIN *join, tmp= adjust_quick_cost(table->opt_range[key].cost, 1); else tmp= table->file->avg_io_cost(); - eq_ref_rows= prev_record_reads(join_positions, idx, - found_ref); - tmp*= eq_ref_rows; - eq_ref_cost= tmp; + tmp *= prev_record_reads(join_positions, idx, found_ref); records=1.0; } else @@ -8251,28 +8247,7 @@ best_access_path(JOIN *join, (table->file->index_flags(start_key->key,0,1) & HA_DO_RANGE_FILTER_PUSHDOWN)) { - double rows; - if (type == JT_EQ_REF) - { - /* - Treat EQ_REF access in a special way: - 1. We have no cost for index-only read. Assume its cost is 50% of - the cost of the full read. - - 2. A regular ref access will do #record_count lookups, but eq_ref - has "lookup cache" which reduces the number of lookups made. - The estimation code uses prev_record_reads() call to estimate: - - tmp = prev_record_reads(join_positions, idx, found_ref); - - Set the effective number of rows from "tmp" here. - */ - keyread_tmp= COST_ADD(eq_ref_cost / 2, s->startup_cost); - rows= eq_ref_rows; - } - else - rows= record_count * records; - + double rows= record_count * records; /* If we use filter F with selectivity s the the cost of fetching data by key using this filter will be @@ -8294,46 +8269,63 @@ best_access_path(JOIN *join, cost_of_fetching_1_row = tmp/rows cost_of_fetching_1_key_tuple = keyread_tmp/rows - access_cost_factor is the gain we expect for using rowid filter. - An access_cost_factor of 1.0 means that keyread_tmp is 0 - (using key read is infinitely fast) and the gain for each row when - using filter is great. - An access_cost_factor if 0.0 means that using keyread has the - same cost as reading rows, so there is no gain to get with - filter. - access_cost_factor should never be bigger than 1.0 (if all - calculations are correct) as the cost of keyread should always be - smaller than the cost of fetching the same number of keys + rows. - access_cost_factor should also never be smaller than 0.0. - The one exception is if number of records is 1 (eq_ref), then - because we are comparing rows to cost of keyread_tmp, keyread_tmp - is higher by 1.0. This is a big that will be fixed in a later - version. - - If we have limited the cost (=tmp) of reading rows with 'worst_seek' - we cannot use filters as the cost calculation below would cause - tmp to become negative. The future resultion is to not limit - cost with worst_seek. + Here's a more detailed explanation that uses the formulas behind + the function the call filter->get_adjusted_gain(). The function + takes as a parameter the number of probes/look-ups into the filter + that is equal to the number of fetched key entries that is equal to + the number of row fetches when no filter is used (assuming no + index condition pushdown is employed for the used key access). + Let this number be N. Then the total gain from using the filter is + N*a_adj - b where b is the cost of building the filter and + a_adj is calcilated as follows: + a - (1-access_cost_factor)*(1-s) = + (1+1_cond_eval_cost)*(1-s)-1_probe_cost - (1-access_cost_factor)*(1-s) + = (1-s)*(1_cond_eval_cost+access_cost_factor) - 1_probe_cost. + Here ((1-s)*(1_cond_eval_cost) * N is the gain from checking less + conditions pushed into the table, 1_probe_cost*N is the cost of the + probes and (1*s) * access_cost_factor * N must be the gain from + accessing less rows. + It does not matter how we calculate the cost of N full row fetches + cost_of_fetching_N_rows or + how we calculate the cost of fetching N key entries + cost_of_fetching_N_key_entries + the gain from less row fetches will be + (cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) * (1-s) + and this should be equal to (1*s) * access_cost_factor * N. + Thus access_cost_factor must be calculated as + (cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) / N. + + For safety we clip cost_of_fetching_N_key_entries by the value + of cost_of_fetching_N_row though formally it's not necessary. */ - double access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0); + /* + For eq_ref access we assume that the cost of fetching N key entries + is equal to the half of fetching N rows + */ + double key_access_cost= + type == JT_EQ_REF ? 0.5 * tmp : MY_MIN(tmp, keyread_tmp); + double access_cost_factor= MY_MIN((tmp - key_access_cost) / rows, 1.0); + if (!(records < s->worst_seeks && records <= thd->variables.max_seeks_for_key)) + { + // Don't use rowid filter trace_access_idx.add("rowid_filter_skipped", "worst/max seeks clipping"); - else if (access_cost_factor <= 0.0) - trace_access_idx.add("rowid_filter_skipped", "cost_factor <= 0"); + filter= NULL; + } else { filter= table->best_range_rowid_filter_for_partial_join(start_key->key, rows, access_cost_factor); - if (filter) - { - tmp-= filter->get_adjusted_gain(rows) - filter->get_cmp_gain(rows); - DBUG_ASSERT(tmp >= 0); - trace_access_idx.add("rowid_filter_key", + } + if (filter) + { + tmp-= filter->get_adjusted_gain(rows) - filter->get_cmp_gain(rows); + DBUG_ASSERT(tmp >= 0); + trace_access_idx.add("rowid_filter_key", table->key_info[filter->key_no].name); - } } } trace_access_idx.add("rows", records).add("cost", tmp); @@ -8506,27 +8498,23 @@ best_access_path(JOIN *join, if ( s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) { double rows= record_count * s->found_records; - double access_cost_factor= MY_MIN(tmp / rows, 1.0); uint key_no= s->quick->index; /* See the comment concerning using rowid filter for with ref access */ - keyread_tmp= s->table->opt_range[key_no].index_only_cost * - record_count; - access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0); - if (access_cost_factor > 0.0) + double row_access_cost= s->quick->read_time * record_count; + double key_access_cost= + MY_MIN(row_access_cost, + s->table->opt_range[key_no].index_only_cost * record_count); + double access_cost_factor= MY_MIN((row_access_cost - key_access_cost) / + rows, 1.0); + filter= + s->table->best_range_rowid_filter_for_partial_join(key_no, rows, + access_cost_factor); + if (filter) { - filter= - s->table-> - best_range_rowid_filter_for_partial_join(key_no, rows, - access_cost_factor); - if (filter) - { - tmp-= filter->get_adjusted_gain(rows); - DBUG_ASSERT(tmp >= 0); - } + tmp-= filter->get_adjusted_gain(rows); + DBUG_ASSERT(tmp >= 0); } - else - trace_access_scan.add("rowid_filter_skipped", "cost_factor <= 0"); type= JT_RANGE; } |