diff options
author | Sergei Petrunia <sergey@mariadb.com> | 2022-06-06 22:21:22 +0300 |
---|---|---|
committer | Sergei Petrunia <sergey@mariadb.com> | 2022-06-06 22:21:22 +0300 |
commit | 161e8c5b7132212d5884865e67c50657854f1b63 (patch) | |
tree | 56e87d77ade61d92d8246556434f99cea7a0ab5b | |
parent | 2f8d0af8837392e7116f624729860615a33d2484 (diff) | |
download | mariadb-git-bb-10.6-mdev28749.tar.gz |
MDEV-28749: restore_prev_nj_state() doesn't update cur_sj_inner_tables correctlybb-10.6-mdev28749
Errors:
1. restore_prev_sj_state() assumed that
"we assume remaining_tables doesnt contain @tab"
which wasn't true.
2. Another bug in this function: it did remove bits from
join->cur_sj_inner_tables but never added them.
3. greedy_search() adds tables into the join prefix but
neglects to update the semi-join optimization state.
It does update nested outer join state, see this call:
check_interleaving_with_nj(best_table)
but there's no matching call to update the semi-join state.
(This wasn't visible because most of the state is in the POSITION
structure which is updated. But there is also state in JOIN, too)
The patch:
- Fixes all of the above
- Adds JOIN::dbug_verify_sj_inner_tables() which is used to verify the
state is correct at every step.
- Renamed advance_sj_state() to optimize_semi_joins().
= Introduced update_sj_state() which ideally should be called
"advance_sj_state" but I didn't reuse the name to avoid confusion.
-rw-r--r-- | mysql-test/main/opt_trace.result | 16 | ||||
-rw-r--r-- | mysql-test/main/subselect_sj.result | 6 | ||||
-rw-r--r-- | mysql-test/main/subselect_sj_jcl6.result | 8 | ||||
-rw-r--r-- | sql/opt_subselect.cc | 104 | ||||
-rw-r--r-- | sql/opt_subselect.h | 8 | ||||
-rw-r--r-- | sql/sql_select.cc | 18 | ||||
-rw-r--r-- | sql/sql_select.h | 8 |
7 files changed, 125 insertions, 43 deletions
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index 5504f4da81e..f0ba5ad199c 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -5495,6 +5495,11 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "cost_for_plan": 451.8615234, "semijoin_strategy_choice": [ { + "strategy": "FirstMatch", + "records": 27, + "read_time": 665.225293 + }, + { "strategy": "DuplicateWeedout", "records": 27, "read_time": 565.2615234 @@ -5670,16 +5675,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { }, "rows_for_plan": 2187, "cost_for_plan": 611.8461426, - "semijoin_strategy_choice": [ - { - "strategy": "FirstMatch", - "records": 81, - "read_time": 2232.809033 - }, - { - "chosen_strategy": "FirstMatch" - } - ], + "semijoin_strategy_choice": [], "pruned_by_cost": true }, { diff --git a/mysql-test/main/subselect_sj.result b/mysql-test/main/subselect_sj.result index 9fd8186b66c..e9a484bbcbf 100644 --- a/mysql-test/main/subselect_sj.result +++ b/mysql-test/main/subselect_sj.result @@ -2178,10 +2178,10 @@ INSERT INTO t5 VALUES (7,0),(9,0); explain SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY t5 index a a 10 NULL 2 Using index; Start temporary +1 PRIMARY t5 index a a 10 NULL 2 Using where; Using index; LooseScan 1 PRIMARY t4 ALL NULL NULL NULL NULL 3 -1 PRIMARY t2 ALL b NULL NULL NULL 10 Using where -1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; End temporary; Using join buffer (flat, BNL join) +1 PRIMARY t2 ref b b 5 test.t5.b 2 Using where; FirstMatch(t5) +1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; Using join buffer (flat, BNL join) SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); a 0 diff --git a/mysql-test/main/subselect_sj_jcl6.result b/mysql-test/main/subselect_sj_jcl6.result index e97c1d5e915..c485b5e5f39 100644 --- a/mysql-test/main/subselect_sj_jcl6.result +++ b/mysql-test/main/subselect_sj_jcl6.result @@ -2189,10 +2189,10 @@ INSERT INTO t5 VALUES (7,0),(9,0); explain SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY t5 index a a 10 NULL 2 Using index; Start temporary -1 PRIMARY t4 ALL NULL NULL NULL NULL 3 Using join buffer (flat, BNL join) -1 PRIMARY t2 ALL b NULL NULL NULL 10 Using where; Using join buffer (incremental, BNL join) -1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; End temporary; Using join buffer (incremental, BNL join) +1 PRIMARY t5 index a a 10 NULL 2 Using where; Using index; LooseScan +1 PRIMARY t4 ALL NULL NULL NULL NULL 3 +1 PRIMARY t2 ref b b 5 test.t5.b 2 Using where; FirstMatch(t5) +1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; Using join buffer (flat, BNL join) SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); a 0 diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 88b1428f2b1..cb451c02a8b 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -180,7 +180,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - optimize_semijoin_nests() does pre-optimization - during join optimization, the join has one JOIN_TAB (or is it POSITION?) - array, and suffix-based detection is used, see advance_sj_state() + array, and suffix-based detection is used, see optimize_semi_joins() - after join optimization is done, get_best_combination() switches the data-structure to prefix-based, multiple JOIN_TAB ranges format. @@ -2761,7 +2761,7 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) Do semi-join optimization step after we've added a new tab to join prefix SYNOPSIS - advance_sj_state() + optimize_semi_joins() join The join we're optimizing remaining_tables Tables not in the join prefix new_join_tab Join tab we've just added to the join prefix @@ -2821,9 +2821,9 @@ bool is_multiple_semi_joins(JOIN *join, POSITION *prefix, uint idx, table_map in } -void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, - double *current_record_count, double *current_read_time, - POSITION *loose_scan_pos) +void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx, + double *current_record_count, + double *current_read_time, POSITION *loose_scan_pos) { POSITION *pos= join->positions + idx; const JOIN_TAB *new_join_tab= pos->table; @@ -3014,19 +3014,36 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, } } - if ((emb_sj_nest= new_join_tab->emb_sj_nest)) + update_sj_state(join, new_join_tab, idx, remaining_tables); + + pos->prefix_cost.convert_from_cost(*current_read_time); + pos->prefix_record_count= *current_record_count; + pos->dups_producing_tables= dups_producing_tables; +} + + +/* + Update JOIN's semi-join optimization state after the join tab new_tab + has been added into the join prefix. + + @seealso restore_prev_sj_state() does the reverse actoion +*/ + +void update_sj_state(JOIN *join, const JOIN_TAB *new_tab, + uint idx, table_map remaining_tables) +{ + if (TABLE_LIST *emb_sj_nest= new_tab->emb_sj_nest) { join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ if (!(remaining_tables & - emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map)) + emb_sj_nest->sj_inner_tables & ~new_tab->table->map)) join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; } - - pos->prefix_cost.convert_from_cost(*current_read_time); - pos->prefix_record_count= *current_record_count; - pos->dups_producing_tables= dups_producing_tables; +#ifndef DBUG_OFF + join->dbug_verify_sj_inner_tables(idx + 1); +#endif } @@ -3579,10 +3596,46 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, return FALSE; } +#ifndef DBUG_OFF +/* + Verify the value of JOIN::cur_sj_inner_tables by recomputing it +*/ +bool JOIN::dbug_verify_sj_inner_tables(uint prefix_size) const +{ + table_map cur_map= const_table_map; + table_map nests_entered= 0; + if (emb_sjm_nest) + { + DBUG_ASSERT(cur_sj_inner_tables == 0); + return (cur_sj_inner_tables == 0); + } + + for (uint i= const_tables; i < prefix_size; i++) + { + JOIN_TAB *tab= positions[i].table; + cur_map |= tab->table->map; + if (TABLE_LIST *sj_nest= tab->emb_sj_nest) + { + nests_entered |= sj_nest->sj_inner_tables; + if (!(sj_nest->sj_inner_tables & ~cur_map)) + { + // all nest tables are in the prefix already + nests_entered &= ~sj_nest->sj_inner_tables; + } + } + } + DBUG_ASSERT(nests_entered == cur_sj_inner_tables); + return (nests_entered == cur_sj_inner_tables); +} +#endif /* Remove the last join tab from from join->cur_sj_inner_tables bitmap - we assume remaining_tables doesnt contain @tab. + + @note + remaining_tables contains @tab. + + @seealso update_sj_state() does the reverse */ void restore_prev_sj_state(const table_map remaining_tables, @@ -3596,15 +3649,30 @@ void restore_prev_sj_state(const table_map remaining_tables, tab->join->sjm_lookup_tables &= ~subq_tables; } - if ((emb_sj_nest= tab->emb_sj_nest)) + if (!tab->join->emb_sjm_nest && (emb_sj_nest= tab->emb_sj_nest)) { + table_map subq_tables= emb_sj_nest->sj_inner_tables & + ~tab->join->const_table_map; /* If we're removing the last SJ-inner table, remove the sj-nest */ - if ((remaining_tables & emb_sj_nest->sj_inner_tables) == - (emb_sj_nest->sj_inner_tables & ~tab->table->map)) + if ((remaining_tables & subq_tables) == subq_tables) { + // All non-const tables of the SJ nest are in the remaining_tables. + // we are not in the nest anymore. tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; } + else + { + // Semi-join nest has: + // - a table being removed (not in the prefix) + // - some tables in the prefix. + tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + } } + +#ifndef DBUG_OFF + /* positions[idx] has been removed. Verify the state for [0...idx-1] */ + tab->join->dbug_verify_sj_inner_tables(idx); +#endif } @@ -3831,8 +3899,8 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN; join->best_positions[first].n_sj_tables= sjm->tables; /* - Do what advance_sj_state did: re-run best_access_path for every table - in the [last_inner_table + 1; pos..) range + Do what optimize_semi_joins did: re-run best_access_path for every + table in the [last_inner_table + 1; pos..) range */ double prefix_rec_count; /* Get the prefix record count */ @@ -5086,7 +5154,7 @@ int setup_semijoin_loosescan(JOIN *join) The choice between the strategies is made by the join optimizer (see - advance_sj_state() and fix_semijoin_strategies_for_picked_join_order()). + optimize_semi_joins() and fix_semijoin_strategies_for_picked_join_order()). This function sets up all fields/structures/etc needed for execution except for setup/initialization of semi-join materialization which is done in setup_sj_materialization() (todo: can't we move that to here also?) diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index abd37f1e98e..7b1b810ee81 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -314,9 +314,11 @@ public: }; -void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, - double *current_record_count, double *current_read_time, - POSITION *loose_scan_pos); +void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx, + double *current_record_count, + double *current_read_time, POSITION *loose_scan_pos); +void update_sj_state(JOIN *join, const JOIN_TAB *new_tab, + uint idx, table_map remaining_tables); void restore_prev_sj_state(const table_map remaining_tables, const JOIN_TAB *tab, uint idx); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index fa33f0e0500..ff5b0c5cae6 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8663,6 +8663,10 @@ choose_plan(JOIN *join, table_map join_tables) { choose_initial_table_order(join); } + /* + Note: constant tables are already in the join prefix. We don't + put them into the cur_sj_inner_tables, though. + */ join->cur_sj_inner_tables= 0; if (straight_join) @@ -8976,8 +8980,8 @@ optimize_straight_join(JOIN *join, table_map join_tables) read_time+= COST_ADD(read_time - filter_cmp_gain, COST_ADD(position->read_time, record_count / TIME_FOR_COMPARE)); - advance_sj_state(join, join_tables, idx, &record_count, &read_time, - &loose_scan_pos); + optimize_semi_joins(join, join_tables, idx, &record_count, &read_time, + &loose_scan_pos); join_tables&= ~(s->table->map); double pushdown_cond_selectivity= 1.0; @@ -9157,6 +9161,12 @@ greedy_search(JOIN *join, /* This has been already checked by best_extension_by_limited_search */ DBUG_ASSERT(!is_interleave_error); + /* + Also, update the semi-join optimization state. Information about the + picked semi-join operation is in best_pos->...picker, but we need to + update the global state in the JOIN object, too. + */ + update_sj_state(join, best_table, idx, remaining_tables); /* find the position of 'best_table' in 'join->best_ref' */ best_idx= idx; @@ -9981,8 +9991,8 @@ best_extension_by_limited_search(JOIN *join, trace_one_table.add("rows_for_plan", current_record_count); trace_one_table.add("cost_for_plan", current_read_time); } - advance_sj_state(join, remaining_tables, idx, ¤t_record_count, - ¤t_read_time, &loose_scan_pos); + optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count, + ¤t_read_time, &loose_scan_pos); /* Expand only partial plans with lower cost than the best QEP so far */ if (current_read_time >= join->best_read) diff --git a/sql/sql_select.h b/sql/sql_select.h index 4a2929207a5..17c4ee63bc6 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1307,9 +1307,15 @@ public: Bitmap of inner tables of semi-join nests that have a proper subset of their tables in the current join prefix. That is, of those semi-join nests that have their tables both in and outside of the join prefix. + (Note: tables that are constants but have not been pulled out of semi-join + nests are not considered part of semi-join nests) */ table_map cur_sj_inner_tables; - + +#ifndef DBUG_OFF + bool dbug_verify_sj_inner_tables(uint n_positions) const; +#endif + /* We also maintain a stack of join optimization states in * join->positions[] */ /******* Join optimization state members end *******/ |