diff options
author | Varun Gupta <varun.gupta@mariadb.com> | 2021-02-03 22:11:59 +0530 |
---|---|---|
committer | Varun Gupta <varun.gupta@mariadb.com> | 2021-02-06 11:06:48 +0530 |
commit | f91641b4fa56ddddfb250b2cffbc464cd544e95f (patch) | |
tree | 5f1fd7261d8f7966871400e1077ea2cfadf52c45 | |
parent | 245fa8cd6ea8ff762abff7691691ac37cba75ce1 (diff) | |
download | mariadb-git-f91641b4fa56ddddfb250b2cffbc464cd544e95f.tar.gz |
MDEV-24740: Selectivity for equi-join predicates not involed in ref access is not taken into account for join cardinality estimation
Introduced a new function to evaluate the selectivity for equi-join predicates
which are not part of ref access.
Enabled this calculation only when one can use the ORDER BY LIMIT optimization.
-rw-r--r-- | sql/sql_select.cc | 157 | ||||
-rw-r--r-- | sql/sql_statistics.cc | 55 | ||||
-rw-r--r-- | sql/sql_statistics.h | 2 |
3 files changed, 207 insertions, 7 deletions
diff --git a/sql/sql_select.cc b/sql/sql_select.cc index b59d652c175..896277da6cc 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -9327,7 +9327,7 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, Field *fld= fi.get_curr_field(); if (!(fld->table->map & ~(table_bit | rem_tables))) continue; - curr_eq_fld_sel= get_column_avg_frequency(fld) / + curr_eq_fld_sel= get_column_avg_frequency_via_stat_tables(fld) / fld->table->stat_records(); if (curr_eq_fld_sel < 1.0) set_if_bigger(eq_fld_sel, curr_eq_fld_sel); @@ -9341,6 +9341,157 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, /** @brief + Get the selectivity of equalities between columns when joining a table + + @param join The optimized join + @param idx The number of tables in the evaluated partual join + @param s The table to be joined for evaluation + @param rem_tables The bitmap of tables to be joined later + @param keyparts The number of key parts to used when joining s + @param ref_keyuse_steps Array of references to keyuses employed to join s + + @note this function is only used for the ORDER BY LIMIT optimization +*/ + +static +double multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, + table_map rem_tables, uint keyparts, + uint16 *ref_keyuse_steps) +{ + double sel= 1.0; + COND_EQUAL *cond_equal= join->cond_equal; + if (!cond_equal || !cond_equal->current_level.elements) + return sel; + + Item_equal *item_equal; + List_iterator_fast<Item_equal> it(cond_equal->current_level); + TABLE *table= s->table; + table_map table_bit= table->map; + POSITION *pos= &join->positions[idx]; + while ((item_equal= it++)) + { + /* + Check whether we need to take into account the selectivity of + multiple equality item_equal. If this is the case multiply + the current value of sel by this selectivity + */ + table_map used_tables= item_equal->used_tables(); + /* + The multiple equality int consideration should have a field of the + current table that is being considered for join evaluation. + Let us take an example here, the query is like + SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t1.b=t3.b AND t2.c=t3.c + And lets consider our current join prefix is: + t1, t2 + so the equalities that we can consider for table t2 are: + t1.a= t2.a (1) + t2.c= t3.c (2) + The other requisite for calculate of selectivity of equi-join conditions + is that for candidate equalities (that is (1) AND (2)) here, there + should be at least a field from the preceding tables of the current + partial join prefix. + So in this case the equality we will take into account would be (1) + */ + if (!(used_tables & table_bit)) + continue; + /* + If the multiple equality involves a constant, then the selectivity for + all the predicates is already taken into account in + table_cond_selectivity + */ + if (item_equal->get_const()) + continue; + bool adjust_sel= FALSE; + Item_equal_fields_iterator fi(*item_equal); + while((fi++) && !adjust_sel) + { + Field *fld= fi.get_curr_field(); + if (fld->table->map != table_bit) + continue; + if (pos->key == 0) + adjust_sel= TRUE; + else + { + uint i; + KEYUSE *keyuse= pos->key; + uint key= keyuse->key; + for (i= 0; i < keyparts; i++) + { + if (i > 0) + keyuse+= ref_keyuse_steps[i-1]; + uint fldno; + if (is_hash_join_key_no(key)) + fldno= keyuse->keypart; + else + fldno= table->key_info[key].key_part[i].fieldnr - 1; + if (fld->field_index == fldno) + break; + } + keyuse= pos->key; + if (i == keyparts) + { + /* + Field fld is included in multiple equality item_equal + and is not a part of the ref key. + The selectivity of the multiple equality must be taken + into account unless one of the ref arguments is + equal to fld. + */ + adjust_sel= TRUE; + for (uint j= 0; j < keyparts && adjust_sel; j++) + { + if (j > 0) + keyuse+= ref_keyuse_steps[j-1]; + Item *ref_item= keyuse->val; + if (ref_item->real_item()->type() == Item::FIELD_ITEM) + { + Item_field *field_item= (Item_field *) (ref_item->real_item()); + if (item_equal->contains(field_item->field)) + adjust_sel= FALSE; + } + } + } + } + } + if (adjust_sel) + { + /* + If ref == 0 and there are no fields in the multiple equality + item_equal that belong to the tables joined prior to s + then the selectivity of multiple equality will be set to 1.0. + */ + double eq_fld_sel= 1.0; + fi.rewind(); + + /* + The formula used to calculate the selectivity for an equi-join + predicate(col1=col2) is: + selectivity(col1=col2) = 1/max(ndv(col1), ndv(col2)); + */ + while ((fi++)) + { + double curr_eq_fld_sel; + Field *fld= fi.get_curr_field(); + /* + Consider field in the multiple equality that depend only on + preceding tables in the partial join prefix + */ + if (!(fld->table->map & ~(table_bit | rem_tables))) + continue; + curr_eq_fld_sel= get_column_avg_frequency(fld) / + fld->table->stat_records(); + if (curr_eq_fld_sel < 1.0) + set_if_smaller(eq_fld_sel, curr_eq_fld_sel); + } + sel*= eq_fld_sel; + } + } + return sel; +} + + +/** + @brief Get the selectivity of conditions when joining a table @param join The optimized join @@ -9566,6 +9717,10 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, } } + if (join->sort_nest_possible) + sel*= multi_eq_cond_selectivity(join, idx, s, rem_tables, + keyparts, ref_keyuse_steps); + sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, keyparts, ref_keyuse_steps); diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index df33e2c43ca..d1b5a9ab39e 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -3672,7 +3672,7 @@ void set_statistics_for_table(THD *thd, TABLE *table) /** @brief - Get the average frequency for a column + Get the average frequency for a column via stat tables @param field The column whose average frequency is required @@ -3681,7 +3681,7 @@ void set_statistics_for_table(THD *thd, TABLE *table) The required average frequency */ -double get_column_avg_frequency(Field * field) +double get_column_avg_frequency_via_stat_tables(Field * field) { double res; TABLE *table= field->table; @@ -3699,12 +3699,55 @@ double get_column_avg_frequency(Field * field) Column_statistics *col_stats= field->read_stats; - if (!col_stats) - res= (double)table->stat_records(); - else + if (field->is_eits_usable()) res= col_stats->get_avg_frequency(); + else + res= (double)table->stat_records(); return res; -} +} + +/** + @brief + Get the average frequency for a column via indexes + + @param + field The column whose average frequency is required + + @retval + The required average frequency +*/ + +double get_column_avg_frequency_via_indexes(Field * field) +{ + uint key; + key_map::Iterator it(field->key_start); + double avg_freq= 0; + while ((key= it++) != key_map::Iterator::BITMAP_END) + { + DBUG_ASSERT(field->table); + KEY *keyinfo= field->table->key_info + key; + if ((avg_freq= keyinfo->actual_rec_per_key(0))) + return avg_freq; + } + return avg_freq; +} + + +/** + @brief + Get the average frequency for a column + + @param + field The column whose average frequency is required + + @retval + The required average frequency +*/ +double get_column_avg_frequency(Field * field) +{ + return get_column_avg_frequency_via_indexes(field) || + get_column_avg_frequency_via_stat_tables(field); +} /** diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 34471fd5270..31494dd271c 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -131,6 +131,8 @@ int rename_column_in_stat_tables(THD *thd, TABLE *tab, Field *col, const char *new_name); void set_statistics_for_table(THD *thd, TABLE *table); +double get_column_avg_frequency_via_stat_tables(Field * field); +double get_column_avg_frequency_via_indexes(Field * field); double get_column_avg_frequency(Field * field); double get_column_range_cardinality(Field *field, |