From 58cd0bd59ef011be54f162237f2ff017c3148e7b Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Mon, 17 Oct 2022 16:44:10 -0700 Subject: MDEV-28846 Poor performance when rowid filter contains no elements When a range rowid filter was used with an index ref access the cost of accessing the index entries for the records rejected by the filter was not taken into account. For a ref access by an index with big average number of records per key this led to poor execution plans if selectivity of the used filter was high. The patch resolves this problem. It also introduces a minor optimization that skips look-ups into a filter that turns out to be empty. With this patch the output of ANALYZE stmt reports the number of look-ups into used rowid filters. The patch also back-ports from 10.5 the code that properly sets the field TABLE::file::table for opened temporary tables. The test cases that were supposed to use rowid filters have been adjusted in order to use similar execution plans after this fix. Approved by Oleksandr Byelkin --- sql/sql_select.cc | 53 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) (limited to 'sql/sql_select.cc') diff --git a/sql/sql_select.cc b/sql/sql_select.cc index a91b4571b21..5ec88e5259c 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7400,6 +7400,7 @@ best_access_path(JOIN *join, table_map best_ref_depends_map= 0; Range_rowid_filter_cost_info *best_filter= 0; double tmp; + double keyread_tmp= 0; ha_rows rec; bool best_uses_jbuf= FALSE; MY_BITMAP *eq_join_set= &s->table->eq_join_set; @@ -7666,11 +7667,16 @@ best_access_path(JOIN *join, tmp= records; set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key); if (table->covering_keys.is_set(key)) - tmp= table->file->keyread_time(key, 1, (ha_rows) tmp); + keyread_tmp= + tmp= table->file->keyread_time(key, 1, (ha_rows) tmp); else + { + keyread_tmp= table->file->keyread_time(key, 1, (ha_rows) tmp); tmp= table->file->read_time(key, 1, (ha_rows) MY_MIN(tmp,s->worst_seeks)); + } tmp= COST_MULT(tmp, record_count); + keyread_tmp= COST_MULT(keyread_tmp, record_count); } } else @@ -7847,11 +7853,16 @@ best_access_path(JOIN *join, /* Limit the number of matched rows */ set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key); if (table->covering_keys.is_set(key)) - tmp= table->file->keyread_time(key, 1, (ha_rows) tmp); + keyread_tmp= + tmp= table->file->keyread_time(key, 1, (ha_rows) tmp); else + { + keyread_tmp= table->file->keyread_time(key, 1, (ha_rows) tmp); tmp= table->file->read_time(key, 1, (ha_rows) MY_MIN(tmp,s->worst_seeks)); + } tmp= COST_MULT(tmp, record_count); + keyread_tmp= COST_MULT(keyread_tmp, record_count); } else { @@ -7870,7 +7881,35 @@ best_access_path(JOIN *join, (found_part & 1)) // start_key->key can be used for index access { double rows= record_count * records; - double access_cost_factor= MY_MIN(tmp / rows, 1.0); + + /* + If we use filter F with selectivity s the the cost of fetching data + by key using this filter will be + cost_of_fetching_1_row * rows * s + + cost_of_fetching_1_key_tuple * rows * (1 - s) + + cost_of_1_lookup_into_filter * rows + Without using any filter the cost would be just + cost_of_fetching_1_row * rows + + So the gain in access cost per row will be + cost_of_fetching_1_row * (1 - s) - + cost_of_fetching_1_key_tuple * (1 - s) - + cost_of_1_lookup_into_filter + = + (cost_of_fetching_1_row - cost_of_fetching_1_key_tuple) * (1 - s) + - cost_of_1_lookup_into_filter + + Here we have: + cost_of_fetching_1_row = tmp/rows + cost_of_fetching_1_key_tuple = keyread_tmp/rows + + Note that access_cost_factor may be greater than 1.0. In this case + we still can expect a gain of using rowid filter due to smaller number + of checks for conditions pushed to the joined table. + */ + double rows_access_cost= MY_MIN(rows, s->worst_seeks); + double access_cost_factor= MY_MIN((rows_access_cost - keyread_tmp) / + rows, 1.0); filter= table->best_range_rowid_filter_for_partial_join(start_key->key, rows, access_cost_factor); @@ -8029,8 +8068,11 @@ best_access_path(JOIN *join, if ( s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) { double rows= record_count * s->found_records; - double access_cost_factor= MY_MIN(tmp / rows, 1.0); uint key_no= s->quick->index; + + /* See the comment concerning using rowid filter for with ref access */ + keyread_tmp= s->table->quick_index_only_costs[key_no]; + double access_cost_factor= MY_MIN((rows - keyread_tmp) / rows, 1.0); filter= s->table->best_range_rowid_filter_for_partial_join(key_no, rows, access_cost_factor); @@ -18810,6 +18852,7 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List &fields, delete table->file; goto err; } + table->file->set_table(table); if (!using_unique_constraint) reclength+= group_null_items; // null flag is stored separately @@ -20651,6 +20694,8 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) DBUG_RETURN(NESTED_LOOP_ERROR); join_tab->build_range_rowid_filter_if_needed(); + if (join_tab->rowid_filter && join_tab->rowid_filter->is_empty()) + rc= NESTED_LOOP_NO_MORE_ROWS; join->return_tab= join_tab; -- cgit v1.2.1 From 3303748fd13399ba39ce4d646153d086c5a09445 Mon Sep 17 00:00:00 2001 From: Oleksandr Byelkin Date: Wed, 2 Nov 2022 12:49:24 +0100 Subject: MDEV-29926: ASAN heap-use-after-free in Explain_query::~Explain_query Make sure that EXPLAIN object allocated on runtime arena. --- sql/sql_select.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'sql/sql_select.cc') diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 70c0a80ba2a..0b330528452 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -1078,6 +1078,15 @@ JOIN::prepare(TABLE_LIST *tables_init, // simple check that we got usable conds dbug_print_item(conds); + /* + It is hack which force creating EXPLAIN object always on runt-time arena + (because very top JOIN::prepare executes always with runtime arena, but + constant subquery like (SELECT 'x') can be called with statement arena + during prepare phase of top SELECT). + */ + if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_PREPARE)) + create_explain_query_if_not_exists(thd->lex, thd->mem_root); + if (select_lex->handle_derived(thd->lex, DT_PREPARE)) DBUG_RETURN(-1); @@ -1521,7 +1530,6 @@ bool JOIN::build_explain() int JOIN::optimize() { int res= 0; - create_explain_query_if_not_exists(thd->lex, thd->mem_root); join_optimization_state init_state= optimization_state; if (optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE) res= optimize_stage2(); -- cgit v1.2.1