summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Widenius <monty@mariadb.org>2022-05-18 22:17:32 +0300
committerSergei Petrunia <sergey@mariadb.com>2022-06-07 20:43:11 +0300
commit432a4ebe5cd2ebf4d0fad79092e82e5d1a9f53ba (patch)
tree2a9bdd81c21f52f5fbb08080c8a43f54b1421888
parent64f24b776dfdb8bcc37cc9d5be022a8af28f76b0 (diff)
downloadmariadb-git-432a4ebe5cd2ebf4d0fad79092e82e5d1a9f53ba.tar.gz
Improve table pruning in optimizer with up to date key_dependent map
Part of: MDEV-28073 Slow query performance in MariaDB when using many tables s->key_dependent has a list of tables that are compared with key fields in the current table. However it does not take into account if a key field could be resolved by another table. This is because MariaDB expands 'join_tab->keyuse' to include all generated comparisons. For example: SELECT * from t1,t2,t3 where t1.key=t2.key and t2.key=t3.key In this case keyuse for t1 includes t2.key and t3.key and key_dependent contains 't2.map | t3.map' If we in best_extension_by_limited_search() consider t2,t1 then t1's key is fully defined, but we cannot do any prune of plans as s->key_dependent indicates that t3 is still needed. Fixed by calculating in best_access_patch the current key_dependent map of tables that is needed to satisfy all keys. This allows us to prune more bad plans earlier as soon as all keys can be used. We also set key_dependent to 0 if we found an EQ_REF key, as this an optimal key for the table and there is no reason to check more keys.
-rw-r--r--mysql-test/main/derived_cond_pushdown.result135
-rw-r--r--mysql-test/main/join_nested.result4
-rw-r--r--mysql-test/main/join_nested_jcl6.result4
-rw-r--r--mysql-test/main/selectivity_innodb.result4
-rw-r--r--sql/sql_select.cc67
-rw-r--r--sql/sql_select.h4
6 files changed, 135 insertions, 83 deletions
diff --git a/mysql-test/main/derived_cond_pushdown.result b/mysql-test/main/derived_cond_pushdown.result
index d15f368e08d..5b0cc4a08f5 100644
--- a/mysql-test/main/derived_cond_pushdown.result
+++ b/mysql-test/main/derived_cond_pushdown.result
@@ -9684,11 +9684,22 @@ EXPLAIN
"query_block": {
"select_id": 1,
"table": {
- "table_name": "<derived2>",
+ "table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100,
- "attached_condition": "1 in (0,dt1.a)",
+ "attached_condition": "1 in (0,t1.a) and t1.a is not null"
+ },
+ "table": {
+ "table_name": "<derived2>",
+ "access_type": "ref",
+ "possible_keys": ["key0"],
+ "key": "key0",
+ "key_length": "5",
+ "used_key_parts": ["a"],
+ "ref": ["test.t1.a"],
+ "rows": 2,
+ "filtered": 100,
"materialized": {
"query_block": {
"select_id": 2,
@@ -9706,18 +9717,6 @@ EXPLAIN
}
}
}
- },
- "block-nl-join": {
- "table": {
- "table_name": "t1",
- "access_type": "ALL",
- "rows": 2,
- "filtered": 100
- },
- "buffer_type": "flat",
- "buffer_size": "65",
- "join_type": "BNL",
- "attached_condition": "t1.a = dt1.a"
}
}
}
@@ -9743,11 +9742,22 @@ EXPLAIN
"query_block": {
"select_id": 1,
"table": {
- "table_name": "<derived2>",
+ "table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100,
- "attached_condition": "dt.a in (1,dt.a)",
+ "attached_condition": "t1.a in (1,t1.a) and t1.a is not null"
+ },
+ "table": {
+ "table_name": "<derived2>",
+ "access_type": "ref",
+ "possible_keys": ["key0"],
+ "key": "key0",
+ "key_length": "5",
+ "used_key_parts": ["a"],
+ "ref": ["test.t1.a"],
+ "rows": 2,
+ "filtered": 100,
"materialized": {
"query_block": {
"select_id": 2,
@@ -9765,18 +9775,6 @@ EXPLAIN
}
}
}
- },
- "block-nl-join": {
- "table": {
- "table_name": "t1",
- "access_type": "ALL",
- "rows": 2,
- "filtered": 100
- },
- "buffer_type": "flat",
- "buffer_size": "119",
- "join_type": "BNL",
- "attached_condition": "t1.a = dt.a"
}
}
}
@@ -10376,11 +10374,22 @@ EXPLAIN
"query_block": {
"select_id": 1,
"table": {
- "table_name": "<derived3>",
+ "table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100,
- "attached_condition": "t.f2 < 2",
+ "attached_condition": "t1.f2 < 2 and t1.f2 is not null"
+ },
+ "table": {
+ "table_name": "<derived3>",
+ "access_type": "ref",
+ "possible_keys": ["key0"],
+ "key": "key0",
+ "key_length": "5",
+ "used_key_parts": ["f2"],
+ "ref": ["test.t1.f2"],
+ "rows": 2,
+ "filtered": 100,
"materialized": {
"query_block": {
"select_id": 3,
@@ -10393,13 +10402,6 @@ EXPLAIN
}
}
}
- },
- "table": {
- "table_name": "t1",
- "access_type": "ALL",
- "rows": 2,
- "filtered": 100,
- "attached_condition": "t1.f2 = t.f2"
}
}
}
@@ -10417,11 +10419,22 @@ EXPLAIN
"query_block": {
"select_id": 1,
"table": {
- "table_name": "<derived3>",
+ "table_name": "t1",
"access_type": "ALL",
"rows": 2,
"filtered": 100,
- "attached_condition": "t.f2 < 2",
+ "attached_condition": "t1.f2 < 2 and t1.f2 is not null"
+ },
+ "table": {
+ "table_name": "<derived3>",
+ "access_type": "ref",
+ "possible_keys": ["key0"],
+ "key": "key0",
+ "key_length": "5",
+ "used_key_parts": ["f2"],
+ "ref": ["test.t1.f2"],
+ "rows": 1,
+ "filtered": 100,
"materialized": {
"query_block": {
"select_id": 3,
@@ -10436,18 +10449,6 @@ EXPLAIN
}
}
}
- },
- "block-nl-join": {
- "table": {
- "table_name": "t1",
- "access_type": "ALL",
- "rows": 2,
- "filtered": 100
- },
- "buffer_type": "flat",
- "buffer_size": "65",
- "join_type": "BNL",
- "attached_condition": "t1.f2 = t.f2"
}
}
}
@@ -14388,8 +14389,8 @@ a b c a b c
3 21 500 3 21 231
explain select * from v1,t2 where (v1.b=t2.b) and (v1.a<4);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY <derived2> ALL NULL NULL NULL NULL 3 Using where
-1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where; Using join buffer (flat, BNL join)
+1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t2.b 2 Using where
2 DERIVED t3 range i1 i1 5 NULL 2 Using index condition
3 UNION t3 range i1 i1 5 NULL 1 Using index condition
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL
@@ -14399,9 +14400,21 @@ EXPLAIN
"query_block": {
"select_id": 1,
"table": {
- "table_name": "<derived2>",
+ "table_name": "t2",
"access_type": "ALL",
- "rows": 3,
+ "rows": 9,
+ "filtered": 100,
+ "attached_condition": "t2.b is not null"
+ },
+ "table": {
+ "table_name": "<derived2>",
+ "access_type": "ref",
+ "possible_keys": ["key0"],
+ "key": "key0",
+ "key_length": "5",
+ "used_key_parts": ["b"],
+ "ref": ["test.t2.b"],
+ "rows": 2,
"filtered": 100,
"attached_condition": "v1.a < 4",
"materialized": {
@@ -14447,18 +14460,6 @@ EXPLAIN
}
}
}
- },
- "block-nl-join": {
- "table": {
- "table_name": "t2",
- "access_type": "ALL",
- "rows": 9,
- "filtered": 100
- },
- "buffer_type": "flat",
- "buffer_size": "173",
- "join_type": "BNL",
- "attached_condition": "t2.b = v1.b"
}
}
}
diff --git a/mysql-test/main/join_nested.result b/mysql-test/main/join_nested.result
index 0c7a1b48940..5f26b03e0d1 100644
--- a/mysql-test/main/join_nested.result
+++ b/mysql-test/main/join_nested.result
@@ -1476,9 +1476,9 @@ join t5 on t5.a=t3.b) on t3.a=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL X
1 SIMPLE t3 ref a a 5 test.t2.b X Using where
-1 SIMPLE t5 ref a a 5 test.t3.b X
-1 SIMPLE t4 ref a a 5 test.t5.a X Using where
+1 SIMPLE t4 ref a a 5 test.t3.b X Using where
1 SIMPLE t6 ref a a 5 test.t4.b X
+1 SIMPLE t5 ref a a 5 test.t3.b X
drop table t0, t1, t2, t3, t4, t5, t6, t7;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
diff --git a/mysql-test/main/join_nested_jcl6.result b/mysql-test/main/join_nested_jcl6.result
index 31f5c794071..5db6d030965 100644
--- a/mysql-test/main/join_nested_jcl6.result
+++ b/mysql-test/main/join_nested_jcl6.result
@@ -1485,9 +1485,9 @@ join t5 on t5.a=t3.b) on t3.a=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL X
1 SIMPLE t3 ref a a 5 test.t2.b X Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
-1 SIMPLE t5 ref a a 5 test.t3.b X Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
-1 SIMPLE t4 ref a a 5 test.t5.a X Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
+1 SIMPLE t4 ref a a 5 test.t3.b X Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
1 SIMPLE t6 ref a a 5 test.t4.b X Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
+1 SIMPLE t5 ref a a 5 test.t3.b X Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
drop table t0, t1, t2, t3, t4, t5, t6, t7;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
diff --git a/mysql-test/main/selectivity_innodb.result b/mysql-test/main/selectivity_innodb.result
index 5457db21436..07cdf15163c 100644
--- a/mysql-test/main/selectivity_innodb.result
+++ b/mysql-test/main/selectivity_innodb.result
@@ -331,8 +331,8 @@ group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
order by o_totalprice desc, o_orderdate;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort
-1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00
+1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index
2 MATERIALIZED lineitem index NULL PRIMARY 8 NULL 6005 100.00
Warnings:
@@ -365,8 +365,8 @@ group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
order by o_totalprice desc, o_orderdate;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort
-1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00
+1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00
1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index
2 MATERIALIZED lineitem index NULL PRIMARY 8 NULL 6005 100.00
Warnings:
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 32f5e41e50d..12a1b74e1ec 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -392,6 +392,7 @@ POSITION::POSITION()
ref_depend_map= dups_producing_tables= 0;
inner_tables_handled_with_other_sjs= 0;
type= JT_UNKNOWN;
+ key_dependent= 0;
dups_weedout_picker.set_empty();
firstmatch_picker.set_empty();
loosescan_picker.set_empty();
@@ -6291,7 +6292,11 @@ add_key_field(JOIN *join,
Field IN ...
*/
if (field->flags & PART_KEY_FLAG)
- stat[0].key_dependent|=used_tables;
+ {
+ stat[0].key_dependent|= used_tables;
+ if (field->key_start.bits_set())
+ stat[0].key_start_dependent= 1;
+ }
bool is_const=1;
for (uint i=0; i<num_values; i++)
@@ -7242,6 +7247,7 @@ bool sort_and_filter_keyuse(THD *thd, DYNAMIC_ARRAY *keyuse,
use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
prev= &key_end;
found_eq_constant= 0;
+ /* Loop over all elements except the last 'key_end' */
for (i=0 ; i < keyuse->elements-1 ; i++,use++)
{
if (!use->is_for_hash_join())
@@ -7699,6 +7705,13 @@ best_access_path(JOIN *join,
double best_time= DBL_MAX;
double records= DBL_MAX;
table_map best_ref_depends_map= 0;
+ /*
+ key_dependent is 0 if all key parts could be used or if there was an
+ EQ_REF table found (which uses all key parts). In other words, we cannot
+ find a better key for the table even if remaining_tables is reduced.
+ Otherwise it's a bitmap of tables that could improve key usage.
+ */
+ table_map key_dependent= 0;
Range_rowid_filter_cost_info *best_filter= 0;
double tmp;
ha_rows rec;
@@ -7750,6 +7763,8 @@ best_access_path(JOIN *join,
key_part_map const_part= 0;
/* The or-null keypart in ref-or-null access: */
key_part_map ref_or_null_part= 0;
+ key_part_map all_parts= 0;
+
if (is_hash_join_key_no(key))
{
/*
@@ -7781,15 +7796,16 @@ best_access_path(JOIN *join,
do /* For each keypart */
{
uint keypart= keyuse->keypart;
- table_map best_part_found_ref= 0;
+ table_map best_part_found_ref= 0, key_parts_dependent= 0;
double best_prev_record_reads= DBL_MAX;
-
+
do /* For each way to access the keypart */
{
/*
if 1. expression doesn't refer to forward tables
2. we won't get two ref-or-null's
*/
+ all_parts|= keyuse->keypart_map;
if (!(remaining_tables & keyuse->used_tables) &&
(!keyuse->validity_ref || *keyuse->validity_ref) &&
s->access_from_tables_is_allowed(keyuse->used_tables,
@@ -7798,6 +7814,7 @@ best_access_path(JOIN *join,
KEY_OPTIMIZE_REF_OR_NULL)))
{
found_part|= keyuse->keypart_map;
+ key_parts_dependent= 0;
if (!(keyuse->used_tables & ~join->const_table_map))
const_part|= keyuse->keypart_map;
@@ -7820,10 +7837,16 @@ best_access_path(JOIN *join,
if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
ref_or_null_part |= keyuse->keypart_map;
}
+ else if (!(found_part & keyuse->keypart_map))
+ key_parts_dependent|= keyuse->used_tables;
+
loose_scan_opt.add_keyuse(remaining_tables, keyuse);
keyuse++;
} while (keyuse->table == table && keyuse->key == key &&
keyuse->keypart == keypart);
+ /* If we found a usable key, remember the dependent tables */
+ if (all_parts & 1)
+ key_dependent|= key_parts_dependent;
found_ref|= best_part_found_ref;
} while (keyuse->table == table && keyuse->key == key);
@@ -8210,6 +8233,24 @@ best_access_path(JOIN *join,
} /* for each key */
records= best_records;
}
+ else
+ {
+ /*
+ No usable keys found. However, there may still be an option to use
+ "Range checked for each record" when all depending tables has
+ been read. s->key_dependent tells us which tables these could be and
+ s->key_start_dependent tells us if a first key part was used.
+ s->key_dependent may include more tables than could be used,
+ but this is ok as not having any usable keys is a rare thing and
+ the performance penalty for extra table bits is that
+ best_extension_by_limited_search() would not be able to prune tables
+ earlier.
+ Example query:
+ SELECT * FROM t1,t2 where t1.key1=t2.key1 OR t2.key2<1
+ */
+ if (s->key_start_dependent)
+ key_dependent= s->key_dependent;
+ }
/*
If there is no key to access the table, but there is an equi-join
@@ -8461,6 +8502,8 @@ best_access_path(JOIN *join,
pos->use_join_buffer= best_uses_jbuf;
pos->spl_plan= spl_plan;
pos->range_rowid_filter_info= best_filter;
+ pos->key_dependent= (best_type == JT_EQ_REF ? (table_map) 0 :
+ key_dependent & remaining_tables);
loose_scan_opt.save_to_position(s, loose_scan_pos);
@@ -9401,10 +9444,7 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
double sel= 1.0;
COND_EQUAL *cond_equal= join->cond_equal;
- if (!cond_equal || !cond_equal->current_level.elements)
- return sel;
-
- if (!s->keyuse)
+ if (!cond_equal || !cond_equal->current_level.elements || !s->keyuse)
return sel;
Item_equal *item_equal;
@@ -10028,11 +10068,18 @@ best_extension_by_limited_search(JOIN *join,
(idx == join->const_tables && // 's' is the first table in the QEP
s->table == join->sort_by_table))
{
+ /*
+ Store the current record count and cost as the best
+ possible cost at this level if the following holds:
+ - It's the lowest record number and cost so far
+ - There is no remaing table that could improve index usage
+ or we found an EQ_REF or REF key with less than 2
+ matching records (good enough).
+ */
if (best_record_count >= current_record_count &&
best_read_time >= current_read_time &&
- /* TODO: What is the reasoning behind this condition? */
- (!(s->key_dependent & allowed_tables & remaining_tables) ||
- join->positions[idx].records_read < 2.0))
+ (!(position->key_dependent & allowed_tables) ||
+ position->records_read < 2.0))
{
best_record_count= current_record_count;
best_read_time= current_read_time;
diff --git a/sql/sql_select.h b/sql/sql_select.h
index e65267558e1..7a72d0efe42 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -376,6 +376,8 @@ typedef struct st_join_table {
uint used_null_fields;
uint used_uneven_bit_fields;
enum join_type type;
+ /* If first key part is used for any key in 'key_dependent' */
+ bool key_start_dependent;
bool cached_eq_ref_table,eq_ref_table;
bool shortcut_for_distinct;
bool sorted;
@@ -958,6 +960,8 @@ public:
/* If ref-based access is used: bitmap of tables this table depends on */
table_map ref_depend_map;
+ /* tables that may help best_access_path() to find a better key */
+ table_map key_dependent;
/*
Bitmap of semi-join inner tables that are in the join prefix and for
which there's no provision for how to eliminate semi-join duplicates