summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMonty <monty@mariadb.org>2023-01-24 16:25:26 +0200
committerMonty <monty@mariadb.org>2023-01-25 14:06:59 +0200
commit5a515dcbda88ba06aba2fb021360595b4ba539e9 (patch)
treee2a135b443ae5dd9a97cece18c505556cb07d0e1
parentbcf5bf786b4f5ef0f58706c1f0c89542cd5552b0 (diff)
downloadmariadb-git-5a515dcbda88ba06aba2fb021360595b4ba539e9.tar.gz
Selectivity: apply found_constraint heuristic only to post-join #rows.
matching_candidates_in_table() computes the number of rows one gets from the current table after applying the WHERE clause on just this table The function had a "found_counstraint heuristic" which reduced the number of rows after WHERE check by 25% if there were comparisons between key parts in table T and previous tables, like WHERE T.keyXpartY= func(prev_table.cols) Note that such comparisons can only be checked when the row of table T is joined with rows of the previous tables. It is wrong to apply the selectivity before the join operation. Fixed by moving the 'found_constraint' code to a separate function and only reducing the #rows in 'records_out'. Renamed matching_candidates_in_table() to apply_selectivity_for_table() as the function now either applies selectivity on the rows (depending on the value of thd->variables.optimizer_use_condition_selectivity) or uses the selectivity from the available range conditions.
-rw-r--r--mysql-test/main/opt_trace.result15
-rw-r--r--mysql-test/main/subselect_sj.result12
-rw-r--r--mysql-test/main/subselect_sj_jcl6.result12
-rw-r--r--sql/sql_select.cc94
4 files changed, 73 insertions, 60 deletions
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index b6122d80fd3..bf9f66f59f4 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -12700,7 +12700,7 @@ EXPLAIN
"loops": 3,
"rows": 10000,
"cost": 1.701731924,
- "filtered": 4.307688236,
+ "filtered": 3.230766058,
"attached_condition": "t1.b < 5000 and t1.c < 1000"
},
"buffer_type": "flat",
@@ -12963,7 +12963,7 @@ explain format=json select * from three, t1 where t1.a=three.a and t1.b<5000 and
"access_type": "scan_with_join_cache",
"rows": 10000,
"rows_after_filter": 430.7688,
- "rows_out": 430.7688,
+ "rows_out": 323.0766,
"cost": 1.701731924,
"index_only": false,
"chosen": true
@@ -12972,7 +12972,7 @@ explain format=json select * from three, t1 where t1.a=three.a and t1.b<5000 and
"chosen_access_method": {
"type": "scan",
"rows_read": 430.7688,
- "rows_out": 430.7688,
+ "rows_out": 323.0766,
"cost": 1.701731924,
"uses_join_buffering": true
}
@@ -12983,8 +12983,11 @@ explain format=json select * from three, t1 where t1.a=three.a and t1.b<5000 and
{
"plan_prefix": "three",
"table": "t1",
- "rows_for_plan": 1292.3064,
- "cost_for_plan": 1.712236739
+ "rows_for_plan": 969.2298,
+ "cost_for_plan": 1.712236739,
+ "pushdown_cond_selectivity": 0.75,
+ "filtered": 3.230766,
+ "rows_out": 323.0766
}
]
},
@@ -12999,7 +13002,7 @@ explain format=json select * from three, t1 where t1.a=three.a and t1.b<5000 and
},
{
"best_join_order": ["three", "t1"],
- "rows": 1292.3064,
+ "rows": 969.2298,
"cost": 1.712236739
},
{
diff --git a/mysql-test/main/subselect_sj.result b/mysql-test/main/subselect_sj.result
index 3174b76e177..a33fb8cea6e 100644
--- a/mysql-test/main/subselect_sj.result
+++ b/mysql-test/main/subselect_sj.result
@@ -2436,9 +2436,9 @@ SET SESSION optimizer_switch='loosescan=off';
EXPLAIN
SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where
-1 PRIMARY t3 range idx idx 4 NULL 2 Using where; Using index
-1 PRIMARY t2 ref idx idx 9 test.t3.b,test.t1.a 1 Using index; FirstMatch(t1)
+1 PRIMARY t1 ALL NULL NULL NULL NULL 4
+1 PRIMARY t2 range idx idx 4 NULL 2 Using where; Using index
+1 PRIMARY t3 ref idx idx 4 test.t2.b 1 Using index; FirstMatch(t1)
SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b);
a
5
@@ -2446,9 +2446,9 @@ SET SESSION optimizer_switch='loosescan=on';
EXPLAIN
SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where
-1 PRIMARY t3 range idx idx 4 NULL 2 Using where; Using index
-1 PRIMARY t2 ref idx idx 9 test.t3.b,test.t1.a 1 Using index; FirstMatch(t1)
+1 PRIMARY t1 ALL NULL NULL NULL NULL 4
+1 PRIMARY t2 range idx idx 4 NULL 2 Using where; Using index
+1 PRIMARY t3 ref idx idx 4 test.t2.b 1 Using index; FirstMatch(t1)
SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b);
a
5
diff --git a/mysql-test/main/subselect_sj_jcl6.result b/mysql-test/main/subselect_sj_jcl6.result
index 79c05e49e31..9028b1c8aac 100644
--- a/mysql-test/main/subselect_sj_jcl6.result
+++ b/mysql-test/main/subselect_sj_jcl6.result
@@ -2442,9 +2442,9 @@ SET SESSION optimizer_switch='loosescan=off';
EXPLAIN
SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where
-1 PRIMARY t3 range idx idx 4 NULL 2 Using where; Using index
-1 PRIMARY t2 ref idx idx 9 test.t3.b,test.t1.a 1 Using index; FirstMatch(t1)
+1 PRIMARY t1 ALL NULL NULL NULL NULL 4
+1 PRIMARY t2 range idx idx 4 NULL 2 Using where; Using index
+1 PRIMARY t3 ref idx idx 4 test.t2.b 1 Using index; FirstMatch(t1)
SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b);
a
5
@@ -2452,9 +2452,9 @@ SET SESSION optimizer_switch='loosescan=on';
EXPLAIN
SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where
-1 PRIMARY t3 range idx idx 4 NULL 2 Using where; Using index
-1 PRIMARY t2 ref idx idx 9 test.t3.b,test.t1.a 1 Using index; FirstMatch(t1)
+1 PRIMARY t1 ALL NULL NULL NULL NULL 4
+1 PRIMARY t2 range idx idx 4 NULL 2 Using where; Using index
+1 PRIMARY t3 ref idx idx 4 test.t2.b 1 Using index; FirstMatch(t1)
SELECT * FROM t1 WHERE a IN (SELECT t2.a FROM t2,t3 WHERE t2.b = t3.b);
a
5
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 5058074bc56..ed13ccf6a2f 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7758,14 +7758,12 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
/*
Estimate how many records we will get if we read just this table and apply
- a part of WHERE that can be checked for it.
+ a part of WHERE that can be checked using only the current table and
+ const tables.
@param s Current JOIN_TAB
- @param with_found_constraint There is a filtering condition on the
- current table. See more below.
@param use_cond_selectivity Value of optimizer_use_condition_selectivity.
If > 1 then use table->cond_selecitivity.
-
@return 0.0 No matching rows
@return >= 1.0 Number of expected matching rows
@@ -7773,28 +7771,15 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
Estimate how many records we will get if we
- read the given table with its "independent" access method (either quick
select or full table/index scan),
- - apply the part of WHERE that refers only to this table.
+ - apply the part of WHERE that refers only to this table and const tables.
- The result cannot be bigger than table records
- 'with_found_constraint' is true if the WHERE has a top level expression of
- type:
- current_table.key_part=expression_that_contains_fields_from_previous_tables
- Examples (assuming join order t1,t2):
- WHERE t2.keypart1=t1.some_field
- WHERE t2.keypart1=t1.some_field+t1.other_field
-
- TODO:
- Extend with_found_constraint' to be set for a top level expression of type
- X=Y where X and Y has fields from current table and at least one field from
- one or more previous tables.
-
@see also
table_after_join_selectivity() produces selectivity of condition that is
checked after joining rows from this table to rows from preceding tables.
*/
-static double matching_candidates_in_table(JOIN_TAB *s,
- bool with_found_constraint,
+static double apply_selectivity_for_table(JOIN_TAB *s,
uint use_cond_selectivity)
{
double dbl_records;
@@ -7828,19 +7813,6 @@ static double matching_candidates_in_table(JOIN_TAB *s,
dbl_records= rows2double(s->table->opt_range_condition_rows);
}
- /*
- If there is a filtering condition on the table (i.e. ref analyzer found
- at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
- preceding this table in the join order we're now considering), then
- assume that 25% of the rows will be filtered out by this condition.
-
- This heuristic is supposed to force tables used in exprZ to be before
- this table in join order.
- */
-
- if (with_found_constraint)
- dbl_records-= dbl_records/4;
-
DBUG_ASSERT(dbl_records <= s->records);
/*
Ensure we return at least one row if there is any possibility to have
@@ -7856,6 +7828,28 @@ static double matching_candidates_in_table(JOIN_TAB *s,
/*
+ Take into account that the table's WHERE clause has conditions on earlier
+ tables that can reduce the number of accepted rows.
+
+ @param records Number of original rows (after selectivity)
+
+ If there is a filtering condition on the table (i.e. ref analyzer found
+ at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
+ preceding this table in the join order we're now considering), then
+ assume that 25% of the rows will be filtered out by this condition.
+
+ This heuristic is supposed to force tables used in exprZ to be before
+ this table in join order.
+*/
+
+static double use_found_constraint(double records)
+{
+ records-= records/4;
+ return records ? MY_MAX(records, MIN_ROWS_AFTER_FILTERING) : 0.0;
+}
+
+
+/*
Calculate the cost of reading a set of rows trough an index
@param eq_ref True if there is only one matching key (EQ_REF)
@@ -8242,9 +8236,6 @@ best_access_path(JOIN *join,
{
found_part|= keyuse->keypart_map;
key_parts_dependent= 0;
- found_constraint|= (keyuse->used_tables &
- ~(remaining_tables |
- join->const_table_map));
if (!(keyuse->used_tables & ~join->const_table_map))
const_part|= keyuse->keypart_map;
@@ -8267,6 +8258,16 @@ best_access_path(JOIN *join,
*/
if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
ref_or_null_part |= keyuse->keypart_map;
+
+ /*
+ Remember if there is a WHERE condition that contains
+ 'key_part=expression_with_only_accessible_tables'
+ We ignore const tables as these are handled by selectivity
+ code (const table fields are treated as constants).
+ */
+ found_constraint|= (keyuse->used_tables &
+ ~(remaining_tables |
+ join->const_table_map));
}
else if (!(found_part & keyuse->keypart_map))
key_parts_dependent|= keyuse->used_tables;
@@ -8795,12 +8796,15 @@ best_access_path(JOIN *join,
(!(table->map & join->outer_join) ||
join->allowed_outer_join_with_cache)) // (2)
{
- double refills, row_copy_cost, cmp_time, cur_cost;
+ double refills, row_copy_cost, cmp_time, cur_cost, records_table_filter;
/* Estimate the cost of the hash join access to the table */
- double rnd_records= matching_candidates_in_table(s, 0,
- use_cond_selectivity);
+ double rnd_records= apply_selectivity_for_table(s, use_cond_selectivity);
+ records_table_filter= ((found_constraint) ?
+ use_found_constraint(rnd_records) :
+ rnd_records);
+
DBUG_ASSERT(rnd_records <= s->found_records);
- set_if_smaller(best.records_out, rnd_records);
+ set_if_smaller(best.records_out, records_table_filter);
/*
The following cost calculation is identical to the cost calculation for
@@ -9010,13 +9014,19 @@ best_access_path(JOIN *join,
}
else
{
+ double records_table_filter;
+
/* We will now calculate cost of scan, with or without join buffer */
records_best_filter= records_after_filter=
- matching_candidates_in_table(s, 0, use_cond_selectivity);
+ apply_selectivity_for_table(s, use_cond_selectivity);
+ records_table_filter= ((found_constraint) ?
+ use_found_constraint(records_after_filter) :
+ records_after_filter);
+
DBUG_ASSERT(records_after_filter <= s->records);
DBUG_ASSERT(records_after_filter <= s->found_records);
- set_if_smaller(best.records_out, records_after_filter);
+ set_if_smaller(best.records_out, records_table_filter);
org_records= rows2double(s->records);
@@ -10337,7 +10347,7 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
with previous tables.
For quick selects and full table scans, selectivity of COND(this_table)
- is accounted for in matching_candidates_in_table(). Here, we only count
+ is accounted for in apply_selectivity_for_table(). Here, we only count
selectivity of COND(this_table, previous_tables).
For other access methods, we need to calculate selectivity of the whole