diff options
author | Monty <monty@mariadb.org> | 2023-01-24 16:25:26 +0200 |
---|---|---|
committer | Monty <monty@mariadb.org> | 2023-01-25 14:06:59 +0200 |
commit | 5a515dcbda88ba06aba2fb021360595b4ba539e9 (patch) | |
tree | e2a135b443ae5dd9a97cece18c505556cb07d0e1 | |
parent | bcf5bf786b4f5ef0f58706c1f0c89542cd5552b0 (diff) | |
download | mariadb-git-5a515dcbda88ba06aba2fb021360595b4ba539e9.tar.gz |
Selectivity: apply found_constraint heuristic only to post-join #rows.
matching_candidates_in_table() computes the number of rows one
gets from the current table after applying the WHERE clause on
just this table
The function had a "found_counstraint heuristic" which reduced the
number of rows after WHERE check by 25% if there were comparisons
between key parts in table T and previous tables, like WHERE
T.keyXpartY= func(prev_table.cols)
Note that such comparisons can only be checked when the row of
table T is joined with rows of the previous tables. It is wrong
to apply the selectivity before the join operation.
Fixed by moving the 'found_constraint' code to a separate function
and only reducing the #rows in 'records_out'.
Renamed matching_candidates_in_table() to apply_selectivity_for_table() as
the function now either applies selectivity on the rows (depending
on the value of thd->variables.optimizer_use_condition_selectivity)
or uses the selectivity from the available range conditions.
-rw-r--r-- | mysql-test/main/opt_trace.result | 15 | ||||
-rw-r--r-- | mysql-test/main/subselect_sj.result | 12 | ||||
-rw-r--r-- | mysql-test/main/subselect_sj_jcl6.result | 12 | ||||
-rw-r--r-- | sql/sql_select.cc | 94 |
4 files changed, 73 insertions, 60 deletions
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index b6122d80fd3..bf9f66f59f4 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -12700,7 +12700,7 @@ EXPLAIN "loops": 3, "rows": 10000, "cost": 1.701731924, - "filtered": 4.307688236, + "filtered": 3.230766058, "attached_condition": "t1.b < 5000 and t1.c < 1000" }, "buffer_type": "flat", @@ -12963,7 +12963,7 @@ explain format=json select * from three, t1 where t1.a=three.a and t1.b<5000 and "access_type": "scan_with_join_cache", "rows": 10000, "rows_after_filter": 430.7688, - "rows_out": 430.7688, + "rows_out": 323.0766, "cost": 1.701731924, "index_only": false, "chosen": true @@ -12972,7 +12972,7 @@ explain format=json select * from three, t1 where t1.a=three.a and t1.b<5000 and "chosen_access_method": { "type": "scan", "rows_read": 430.7688, - "rows_out": 430.7688, + "rows_out": 323.0766, "cost": 1.701731924, "uses_join_buffering": true } @@ -12983,8 +12983,11 @@ explain format=json select * from three, t1 where t1.a=three.a and t1.b<5000 and { "plan_prefix": "three", "table": "t1", - "rows_for_plan": 1292.3064, - "cost_for_plan": 1.712236739 + "rows_for_plan": 969.2298, + "cost_for_plan": 1.712236739, + "pushdown_cond_selectivity": 0.75, + "filtered": 3.230766, + "rows_out": 323.0766 } ] }, @@ -12999,7 +13002,7 @@ explain format=json select * from three, t1 where t1.a=three.a and t1.b<5000 and }, { "best_join_order": ["three", "t1"], - "rows": 1292.3064, + "rows": 969.2298, "cost": 1.712236739 }, { diff --git a/mysql-test/main/subselect_sj.result b/mysql-test/main/subselect_sj.result index 3174b76e177..a33fb8cea6e 100644 --- a/mysql-test/main/subselect_sj.result +++ b/mysql-test/main/subselect_sj.result @@ -2436,9 +2436,9 @@ SET SESSION optimizer_switch='loosescan=off'; EXPLAIN SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where -1 PRIMARY t3 range idx idx 4 NULL 2 Using where; Using index -1 PRIMARY t2 ref idx idx 9 test.t3.b,test.t1.a 1 Using index; FirstMatch(t1) +1 PRIMARY t1 ALL NULL NULL NULL NULL 4 +1 PRIMARY t2 range idx idx 4 NULL 2 Using where; Using index +1 PRIMARY t3 ref idx idx 4 test.t2.b 1 Using index; FirstMatch(t1) SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b); a 5 @@ -2446,9 +2446,9 @@ SET SESSION optimizer_switch='loosescan=on'; EXPLAIN SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where -1 PRIMARY t3 range idx idx 4 NULL 2 Using where; Using index -1 PRIMARY t2 ref idx idx 9 test.t3.b,test.t1.a 1 Using index; FirstMatch(t1) +1 PRIMARY t1 ALL NULL NULL NULL NULL 4 +1 PRIMARY t2 range idx idx 4 NULL 2 Using where; Using index +1 PRIMARY t3 ref idx idx 4 test.t2.b 1 Using index; FirstMatch(t1) SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b); a 5 diff --git a/mysql-test/main/subselect_sj_jcl6.result b/mysql-test/main/subselect_sj_jcl6.result index 79c05e49e31..9028b1c8aac 100644 --- a/mysql-test/main/subselect_sj_jcl6.result +++ b/mysql-test/main/subselect_sj_jcl6.result @@ -2442,9 +2442,9 @@ SET SESSION optimizer_switch='loosescan=off'; EXPLAIN SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where -1 PRIMARY t3 range idx idx 4 NULL 2 Using where; Using index -1 PRIMARY t2 ref idx idx 9 test.t3.b,test.t1.a 1 Using index; FirstMatch(t1) +1 PRIMARY t1 ALL NULL NULL NULL NULL 4 +1 PRIMARY t2 range idx idx 4 NULL 2 Using where; Using index +1 PRIMARY t3 ref idx idx 4 test.t2.b 1 Using index; FirstMatch(t1) SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b); a 5 @@ -2452,9 +2452,9 @@ SET SESSION optimizer_switch='loosescan=on'; EXPLAIN SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where -1 PRIMARY t3 range idx idx 4 NULL 2 Using where; Using index -1 PRIMARY t2 ref idx idx 9 test.t3.b,test.t1.a 1 Using index; FirstMatch(t1) +1 PRIMARY t1 ALL NULL NULL NULL NULL 4 +1 PRIMARY t2 range idx idx 4 NULL 2 Using where; Using index +1 PRIMARY t3 ref idx idx 4 test.t2.b 1 Using index; FirstMatch(t1) SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b); a 5 diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 5058074bc56..ed13ccf6a2f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7758,14 +7758,12 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) /* Estimate how many records we will get if we read just this table and apply - a part of WHERE that can be checked for it. + a part of WHERE that can be checked using only the current table and + const tables. @param s Current JOIN_TAB - @param with_found_constraint There is a filtering condition on the - current table. See more below. @param use_cond_selectivity Value of optimizer_use_condition_selectivity. If > 1 then use table->cond_selecitivity. - @return 0.0 No matching rows @return >= 1.0 Number of expected matching rows @@ -7773,28 +7771,15 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) Estimate how many records we will get if we - read the given table with its "independent" access method (either quick select or full table/index scan), - - apply the part of WHERE that refers only to this table. + - apply the part of WHERE that refers only to this table and const tables. - The result cannot be bigger than table records - 'with_found_constraint' is true if the WHERE has a top level expression of - type: - current_table.key_part=expression_that_contains_fields_from_previous_tables - Examples (assuming join order t1,t2): - WHERE t2.keypart1=t1.some_field - WHERE t2.keypart1=t1.some_field+t1.other_field - - TODO: - Extend with_found_constraint' to be set for a top level expression of type - X=Y where X and Y has fields from current table and at least one field from - one or more previous tables. - @see also table_after_join_selectivity() produces selectivity of condition that is checked after joining rows from this table to rows from preceding tables. */ -static double matching_candidates_in_table(JOIN_TAB *s, - bool with_found_constraint, +static double apply_selectivity_for_table(JOIN_TAB *s, uint use_cond_selectivity) { double dbl_records; @@ -7828,19 +7813,6 @@ static double matching_candidates_in_table(JOIN_TAB *s, dbl_records= rows2double(s->table->opt_range_condition_rows); } - /* - If there is a filtering condition on the table (i.e. ref analyzer found - at least one "table.keyXpartY= exprZ", where exprZ refers only to tables - preceding this table in the join order we're now considering), then - assume that 25% of the rows will be filtered out by this condition. - - This heuristic is supposed to force tables used in exprZ to be before - this table in join order. - */ - - if (with_found_constraint) - dbl_records-= dbl_records/4; - DBUG_ASSERT(dbl_records <= s->records); /* Ensure we return at least one row if there is any possibility to have @@ -7856,6 +7828,28 @@ static double matching_candidates_in_table(JOIN_TAB *s, /* + Take into account that the table's WHERE clause has conditions on earlier + tables that can reduce the number of accepted rows. + + @param records Number of original rows (after selectivity) + + If there is a filtering condition on the table (i.e. ref analyzer found + at least one "table.keyXpartY= exprZ", where exprZ refers only to tables + preceding this table in the join order we're now considering), then + assume that 25% of the rows will be filtered out by this condition. + + This heuristic is supposed to force tables used in exprZ to be before + this table in join order. +*/ + +static double use_found_constraint(double records) +{ + records-= records/4; + return records ? MY_MAX(records, MIN_ROWS_AFTER_FILTERING) : 0.0; +} + + +/* Calculate the cost of reading a set of rows trough an index @param eq_ref True if there is only one matching key (EQ_REF) @@ -8242,9 +8236,6 @@ best_access_path(JOIN *join, { found_part|= keyuse->keypart_map; key_parts_dependent= 0; - found_constraint|= (keyuse->used_tables & - ~(remaining_tables | - join->const_table_map)); if (!(keyuse->used_tables & ~join->const_table_map)) const_part|= keyuse->keypart_map; @@ -8267,6 +8258,16 @@ best_access_path(JOIN *join, */ if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) ref_or_null_part |= keyuse->keypart_map; + + /* + Remember if there is a WHERE condition that contains + 'key_part=expression_with_only_accessible_tables' + We ignore const tables as these are handled by selectivity + code (const table fields are treated as constants). + */ + found_constraint|= (keyuse->used_tables & + ~(remaining_tables | + join->const_table_map)); } else if (!(found_part & keyuse->keypart_map)) key_parts_dependent|= keyuse->used_tables; @@ -8795,12 +8796,15 @@ best_access_path(JOIN *join, (!(table->map & join->outer_join) || join->allowed_outer_join_with_cache)) // (2) { - double refills, row_copy_cost, cmp_time, cur_cost; + double refills, row_copy_cost, cmp_time, cur_cost, records_table_filter; /* Estimate the cost of the hash join access to the table */ - double rnd_records= matching_candidates_in_table(s, 0, - use_cond_selectivity); + double rnd_records= apply_selectivity_for_table(s, use_cond_selectivity); + records_table_filter= ((found_constraint) ? + use_found_constraint(rnd_records) : + rnd_records); + DBUG_ASSERT(rnd_records <= s->found_records); - set_if_smaller(best.records_out, rnd_records); + set_if_smaller(best.records_out, records_table_filter); /* The following cost calculation is identical to the cost calculation for @@ -9010,13 +9014,19 @@ best_access_path(JOIN *join, } else { + double records_table_filter; + /* We will now calculate cost of scan, with or without join buffer */ records_best_filter= records_after_filter= - matching_candidates_in_table(s, 0, use_cond_selectivity); + apply_selectivity_for_table(s, use_cond_selectivity); + records_table_filter= ((found_constraint) ? + use_found_constraint(records_after_filter) : + records_after_filter); + DBUG_ASSERT(records_after_filter <= s->records); DBUG_ASSERT(records_after_filter <= s->found_records); - set_if_smaller(best.records_out, records_after_filter); + set_if_smaller(best.records_out, records_table_filter); org_records= rows2double(s->records); @@ -10337,7 +10347,7 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, with previous tables. For quick selects and full table scans, selectivity of COND(this_table) - is accounted for in matching_candidates_in_table(). Here, we only count + is accounted for in apply_selectivity_for_table(). Here, we only count selectivity of COND(this_table, previous_tables). For other access methods, we need to calculate selectivity of the whole |