summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Petrunia <sergey@mariadb.com>2022-06-06 22:21:22 +0300
committerSergei Petrunia <sergey@mariadb.com>2022-06-06 22:21:22 +0300
commit161e8c5b7132212d5884865e67c50657854f1b63 (patch)
tree56e87d77ade61d92d8246556434f99cea7a0ab5b
parent2f8d0af8837392e7116f624729860615a33d2484 (diff)
downloadmariadb-git-bb-10.6-mdev28749.tar.gz
MDEV-28749: restore_prev_nj_state() doesn't update cur_sj_inner_tables correctlybb-10.6-mdev28749
Errors: 1. restore_prev_sj_state() assumed that "we assume remaining_tables doesnt contain @tab" which wasn't true. 2. Another bug in this function: it did remove bits from join->cur_sj_inner_tables but never added them. 3. greedy_search() adds tables into the join prefix but neglects to update the semi-join optimization state. It does update nested outer join state, see this call: check_interleaving_with_nj(best_table) but there's no matching call to update the semi-join state. (This wasn't visible because most of the state is in the POSITION structure which is updated. But there is also state in JOIN, too) The patch: - Fixes all of the above - Adds JOIN::dbug_verify_sj_inner_tables() which is used to verify the state is correct at every step. - Renamed advance_sj_state() to optimize_semi_joins(). = Introduced update_sj_state() which ideally should be called "advance_sj_state" but I didn't reuse the name to avoid confusion.
-rw-r--r--mysql-test/main/opt_trace.result16
-rw-r--r--mysql-test/main/subselect_sj.result6
-rw-r--r--mysql-test/main/subselect_sj_jcl6.result8
-rw-r--r--sql/opt_subselect.cc104
-rw-r--r--sql/opt_subselect.h8
-rw-r--r--sql/sql_select.cc18
-rw-r--r--sql/sql_select.h8
7 files changed, 125 insertions, 43 deletions
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index 5504f4da81e..f0ba5ad199c 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -5495,6 +5495,11 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) {
"cost_for_plan": 451.8615234,
"semijoin_strategy_choice": [
{
+ "strategy": "FirstMatch",
+ "records": 27,
+ "read_time": 665.225293
+ },
+ {
"strategy": "DuplicateWeedout",
"records": 27,
"read_time": 565.2615234
@@ -5670,16 +5675,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) {
},
"rows_for_plan": 2187,
"cost_for_plan": 611.8461426,
- "semijoin_strategy_choice": [
- {
- "strategy": "FirstMatch",
- "records": 81,
- "read_time": 2232.809033
- },
- {
- "chosen_strategy": "FirstMatch"
- }
- ],
+ "semijoin_strategy_choice": [],
"pruned_by_cost": true
},
{
diff --git a/mysql-test/main/subselect_sj.result b/mysql-test/main/subselect_sj.result
index 9fd8186b66c..e9a484bbcbf 100644
--- a/mysql-test/main/subselect_sj.result
+++ b/mysql-test/main/subselect_sj.result
@@ -2178,10 +2178,10 @@ INSERT INTO t5 VALUES (7,0),(9,0);
explain
SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY t5 index a a 10 NULL 2 Using index; Start temporary
+1 PRIMARY t5 index a a 10 NULL 2 Using where; Using index; LooseScan
1 PRIMARY t4 ALL NULL NULL NULL NULL 3
-1 PRIMARY t2 ALL b NULL NULL NULL 10 Using where
-1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; End temporary; Using join buffer (flat, BNL join)
+1 PRIMARY t2 ref b b 5 test.t5.b 2 Using where; FirstMatch(t5)
+1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; Using join buffer (flat, BNL join)
SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b);
a
0
diff --git a/mysql-test/main/subselect_sj_jcl6.result b/mysql-test/main/subselect_sj_jcl6.result
index e97c1d5e915..c485b5e5f39 100644
--- a/mysql-test/main/subselect_sj_jcl6.result
+++ b/mysql-test/main/subselect_sj_jcl6.result
@@ -2189,10 +2189,10 @@ INSERT INTO t5 VALUES (7,0),(9,0);
explain
SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY t5 index a a 10 NULL 2 Using index; Start temporary
-1 PRIMARY t4 ALL NULL NULL NULL NULL 3 Using join buffer (flat, BNL join)
-1 PRIMARY t2 ALL b NULL NULL NULL 10 Using where; Using join buffer (incremental, BNL join)
-1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; End temporary; Using join buffer (incremental, BNL join)
+1 PRIMARY t5 index a a 10 NULL 2 Using where; Using index; LooseScan
+1 PRIMARY t4 ALL NULL NULL NULL NULL 3
+1 PRIMARY t2 ref b b 5 test.t5.b 2 Using where; FirstMatch(t5)
+1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; Using join buffer (flat, BNL join)
SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b);
a
0
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 88b1428f2b1..cb451c02a8b 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -180,7 +180,7 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- optimize_semijoin_nests() does pre-optimization
- during join optimization, the join has one JOIN_TAB (or is it POSITION?)
- array, and suffix-based detection is used, see advance_sj_state()
+ array, and suffix-based detection is used, see optimize_semi_joins()
- after join optimization is done, get_best_combination() switches
the data-structure to prefix-based, multiple JOIN_TAB ranges format.
@@ -2761,7 +2761,7 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
Do semi-join optimization step after we've added a new tab to join prefix
SYNOPSIS
- advance_sj_state()
+ optimize_semi_joins()
join The join we're optimizing
remaining_tables Tables not in the join prefix
new_join_tab Join tab we've just added to the join prefix
@@ -2821,9 +2821,9 @@ bool is_multiple_semi_joins(JOIN *join, POSITION *prefix, uint idx, table_map in
}
-void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
- double *current_record_count, double *current_read_time,
- POSITION *loose_scan_pos)
+void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx,
+ double *current_record_count,
+ double *current_read_time, POSITION *loose_scan_pos)
{
POSITION *pos= join->positions + idx;
const JOIN_TAB *new_join_tab= pos->table;
@@ -3014,19 +3014,36 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
}
}
- if ((emb_sj_nest= new_join_tab->emb_sj_nest))
+ update_sj_state(join, new_join_tab, idx, remaining_tables);
+
+ pos->prefix_cost.convert_from_cost(*current_read_time);
+ pos->prefix_record_count= *current_record_count;
+ pos->dups_producing_tables= dups_producing_tables;
+}
+
+
+/*
+ Update JOIN's semi-join optimization state after the join tab new_tab
+ has been added into the join prefix.
+
+ @seealso restore_prev_sj_state() does the reverse actoion
+*/
+
+void update_sj_state(JOIN *join, const JOIN_TAB *new_tab,
+ uint idx, table_map remaining_tables)
+{
+ if (TABLE_LIST *emb_sj_nest= new_tab->emb_sj_nest)
{
join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
/* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */
if (!(remaining_tables &
- emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map))
+ emb_sj_nest->sj_inner_tables & ~new_tab->table->map))
join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
}
-
- pos->prefix_cost.convert_from_cost(*current_read_time);
- pos->prefix_record_count= *current_record_count;
- pos->dups_producing_tables= dups_producing_tables;
+#ifndef DBUG_OFF
+ join->dbug_verify_sj_inner_tables(idx + 1);
+#endif
}
@@ -3579,10 +3596,46 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join,
return FALSE;
}
+#ifndef DBUG_OFF
+/*
+ Verify the value of JOIN::cur_sj_inner_tables by recomputing it
+*/
+bool JOIN::dbug_verify_sj_inner_tables(uint prefix_size) const
+{
+ table_map cur_map= const_table_map;
+ table_map nests_entered= 0;
+ if (emb_sjm_nest)
+ {
+ DBUG_ASSERT(cur_sj_inner_tables == 0);
+ return (cur_sj_inner_tables == 0);
+ }
+
+ for (uint i= const_tables; i < prefix_size; i++)
+ {
+ JOIN_TAB *tab= positions[i].table;
+ cur_map |= tab->table->map;
+ if (TABLE_LIST *sj_nest= tab->emb_sj_nest)
+ {
+ nests_entered |= sj_nest->sj_inner_tables;
+ if (!(sj_nest->sj_inner_tables & ~cur_map))
+ {
+ // all nest tables are in the prefix already
+ nests_entered &= ~sj_nest->sj_inner_tables;
+ }
+ }
+ }
+ DBUG_ASSERT(nests_entered == cur_sj_inner_tables);
+ return (nests_entered == cur_sj_inner_tables);
+}
+#endif
/*
Remove the last join tab from from join->cur_sj_inner_tables bitmap
- we assume remaining_tables doesnt contain @tab.
+
+ @note
+ remaining_tables contains @tab.
+
+ @seealso update_sj_state() does the reverse
*/
void restore_prev_sj_state(const table_map remaining_tables,
@@ -3596,15 +3649,30 @@ void restore_prev_sj_state(const table_map remaining_tables,
tab->join->sjm_lookup_tables &= ~subq_tables;
}
- if ((emb_sj_nest= tab->emb_sj_nest))
+ if (!tab->join->emb_sjm_nest && (emb_sj_nest= tab->emb_sj_nest))
{
+ table_map subq_tables= emb_sj_nest->sj_inner_tables &
+ ~tab->join->const_table_map;
/* If we're removing the last SJ-inner table, remove the sj-nest */
- if ((remaining_tables & emb_sj_nest->sj_inner_tables) ==
- (emb_sj_nest->sj_inner_tables & ~tab->table->map))
+ if ((remaining_tables & subq_tables) == subq_tables)
{
+ // All non-const tables of the SJ nest are in the remaining_tables.
+ // we are not in the nest anymore.
tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables;
}
+ else
+ {
+ // Semi-join nest has:
+ // - a table being removed (not in the prefix)
+ // - some tables in the prefix.
+ tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables;
+ }
}
+
+#ifndef DBUG_OFF
+ /* positions[idx] has been removed. Verify the state for [0...idx-1] */
+ tab->join->dbug_verify_sj_inner_tables(idx);
+#endif
}
@@ -3831,8 +3899,8 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN;
join->best_positions[first].n_sj_tables= sjm->tables;
/*
- Do what advance_sj_state did: re-run best_access_path for every table
- in the [last_inner_table + 1; pos..) range
+ Do what optimize_semi_joins did: re-run best_access_path for every
+ table in the [last_inner_table + 1; pos..) range
*/
double prefix_rec_count;
/* Get the prefix record count */
@@ -5086,7 +5154,7 @@ int setup_semijoin_loosescan(JOIN *join)
The choice between the strategies is made by the join optimizer (see
- advance_sj_state() and fix_semijoin_strategies_for_picked_join_order()).
+ optimize_semi_joins() and fix_semijoin_strategies_for_picked_join_order()).
This function sets up all fields/structures/etc needed for execution except
for setup/initialization of semi-join materialization which is done in
setup_sj_materialization() (todo: can't we move that to here also?)
diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h
index abd37f1e98e..7b1b810ee81 100644
--- a/sql/opt_subselect.h
+++ b/sql/opt_subselect.h
@@ -314,9 +314,11 @@ public:
};
-void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx,
- double *current_record_count, double *current_read_time,
- POSITION *loose_scan_pos);
+void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx,
+ double *current_record_count,
+ double *current_read_time, POSITION *loose_scan_pos);
+void update_sj_state(JOIN *join, const JOIN_TAB *new_tab,
+ uint idx, table_map remaining_tables);
void restore_prev_sj_state(const table_map remaining_tables,
const JOIN_TAB *tab, uint idx);
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index fa33f0e0500..ff5b0c5cae6 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -8663,6 +8663,10 @@ choose_plan(JOIN *join, table_map join_tables)
{
choose_initial_table_order(join);
}
+ /*
+ Note: constant tables are already in the join prefix. We don't
+ put them into the cur_sj_inner_tables, though.
+ */
join->cur_sj_inner_tables= 0;
if (straight_join)
@@ -8976,8 +8980,8 @@ optimize_straight_join(JOIN *join, table_map join_tables)
read_time+= COST_ADD(read_time - filter_cmp_gain,
COST_ADD(position->read_time,
record_count / TIME_FOR_COMPARE));
- advance_sj_state(join, join_tables, idx, &record_count, &read_time,
- &loose_scan_pos);
+ optimize_semi_joins(join, join_tables, idx, &record_count, &read_time,
+ &loose_scan_pos);
join_tables&= ~(s->table->map);
double pushdown_cond_selectivity= 1.0;
@@ -9157,6 +9161,12 @@ greedy_search(JOIN *join,
/* This has been already checked by best_extension_by_limited_search */
DBUG_ASSERT(!is_interleave_error);
+ /*
+ Also, update the semi-join optimization state. Information about the
+ picked semi-join operation is in best_pos->...picker, but we need to
+ update the global state in the JOIN object, too.
+ */
+ update_sj_state(join, best_table, idx, remaining_tables);
/* find the position of 'best_table' in 'join->best_ref' */
best_idx= idx;
@@ -9981,8 +9991,8 @@ best_extension_by_limited_search(JOIN *join,
trace_one_table.add("rows_for_plan", current_record_count);
trace_one_table.add("cost_for_plan", current_read_time);
}
- advance_sj_state(join, remaining_tables, idx, &current_record_count,
- &current_read_time, &loose_scan_pos);
+ optimize_semi_joins(join, remaining_tables, idx, &current_record_count,
+ &current_read_time, &loose_scan_pos);
/* Expand only partial plans with lower cost than the best QEP so far */
if (current_read_time >= join->best_read)
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 4a2929207a5..17c4ee63bc6 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -1307,9 +1307,15 @@ public:
Bitmap of inner tables of semi-join nests that have a proper subset of
their tables in the current join prefix. That is, of those semi-join
nests that have their tables both in and outside of the join prefix.
+ (Note: tables that are constants but have not been pulled out of semi-join
+ nests are not considered part of semi-join nests)
*/
table_map cur_sj_inner_tables;
-
+
+#ifndef DBUG_OFF
+ bool dbug_verify_sj_inner_tables(uint n_positions) const;
+#endif
+
/* We also maintain a stack of join optimization states in * join->positions[] */
/******* Join optimization state members end *******/