From 42fea34d4a9f4ed1ae87cf8494f07bcdfcc49e83 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Mon, 20 Dec 2021 19:55:00 -0800 Subject: MDEV-27262 Unexpected index intersection with full index scan for an index If when extracting a range condition for an index from the WHERE condition Range Optimizer sees that the range condition covers the whole index then such condition should be discarded because it cannot be used in any range scan. In some cases Range Optimizer really does it, but there remained some conditions for which it was not done. As a result the optimizer could produce index merge plans with the full index scan for one of the indexes participating in the index merge. This could be observed in one of the test cases from index_merge1.inc where a plan with index_merge_sort_union was produced and in the test case reported for this bug where a plan with index_merge_sort_intersect was produced. In both cases one of two index scans participating in index merge ran over the whole index. The patch slightly changes the original above mentioned test case from index_merge1.inc to be able to produce an intended plan employing index_merge_sort_union. The original query was left to show that index merge is not used for it anymore. It should be noted that for the plan with index_merge_sort_intersect could be chosen for execution only due to a defect in the InnoDB code that returns wrong estimates for the cardinality of big ranges. This bug led to serious problems in 10.4+ where the optimization using Rowid filters is employed (see mdev-26446). Approved by Sergey Petrunia --- sql/opt_range.cc | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'sql/opt_range.cc') diff --git a/sql/opt_range.cc b/sql/opt_range.cc index f3f184367c9..89ee8cf98e1 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -9329,7 +9329,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no); - for (key2=key2->first(); key2; ) + for (key2=key2->first(); ; ) { /* key1 consists of one or more ranges. tmp is the range currently @@ -9343,6 +9343,16 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) ^ tmp */ + if (key1->min_flag & NO_MIN_RANGE && + key1->max_flag & NO_MAX_RANGE) + { + if (key1->maybe_flag) + return new SEL_ARG(SEL_ARG::MAYBE_KEY); + return 0; // Always true OR + } + if (!key2) + break; + SEL_ARG *tmp=key1->find_range(key2); /* @@ -9413,6 +9423,13 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2) key2->copy_min(tmp); if (!(key1=key1->tree_delete(tmp))) { // Only one key in tree + if (key2->min_flag & NO_MIN_RANGE && + key2->max_flag & NO_MAX_RANGE) + { + if (key2->maybe_flag) + return new SEL_ARG(SEL_ARG::MAYBE_KEY); + return 0; // Always true OR + } key1=key2; key1->make_root(); key2=key2_next; -- cgit v1.2.1 From c18896f9c1ce6e4b9a8519a2d5155698d82ae45a Mon Sep 17 00:00:00 2001 From: Monty Date: Mon, 27 Dec 2021 18:51:00 +0200 Subject: MDEV-14907 FEDERATEDX doesn't respect DISTINCT Federated and Federatex cannot be used with ROR scans Federated::position() and Federatex::position() is storing in 'ref' a pointer into a local result set buffer. This means that one cannot compare 'ref' from different handler instances to see if they point to the same physical record. This bug caused federated.federatedx to return wrong results when the optimizer tried to use index_merge to resolve some queries. Fixed by introducing table flag HA_NON_COMPARABLE_ROWID and using this with the above handlers. Todo: - Fix multi_delete(), multi_update and read_records() to use primary key instead of 'ref' if case HA_NON_COMPARABLE_ROWID is set. The current code only works if we have only one range (like table scan) for the tables that will be updated in the second pass. - Enable DBUG_ASSERT() in ha_federated::cmp_ref() and ha_federatedx::cmp_ref(). --- sql/opt_range.cc | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'sql/opt_range.cc') diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 5f13c9b3441..d3104019edb 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -2678,6 +2678,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, records= head->stat_records(); if (!records) records++; /* purecov: inspected */ + if (head->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) + only_single_index_range_scan= 1; if (head->force_index || force_quick_range) scan_time= read_time= DBL_MAX; @@ -11312,6 +11314,9 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts) table_key->user_defined_key_parts); uint pk_number; + if (param->table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID) + return false; + for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++) { uint16 fieldnr= param->table->key_info[keynr]. -- cgit v1.2.1 From fdec8852014960b33b302fc2467cc535eff78186 Mon Sep 17 00:00:00 2001 From: Monty Date: Wed, 19 Jan 2022 18:34:45 +0200 Subject: MDEV-25830 optimizer_use_condition_selectivity=4 sometimes produces worse plan than optimizer_use_condition_selectivity=1 The issue was that calc_cond_selectivity_for_table prefered ranges with many parts and when deciding on which selectivity to use. Fixed by going through ranges according to the number of rows in the range. This ensures that selectivity from ranges with few rows will be prefered over ranges with many rows for indexes that uses the same columns. --- sql/opt_range.cc | 49 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) (limited to 'sql/opt_range.cc') diff --git a/sql/opt_range.cc b/sql/opt_range.cc index d3104019edb..7599401dcb4 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3249,6 +3249,25 @@ double records_in_column_ranges(PARAM *param, uint idx, } +/* + Compare quick select ranges according to number of found rows + If there is equal amounts of rows, use the long key part. + The idea is that if we have keys (a),(a,b) and (a,b,c) and we have + a query like WHERE a=1 and b=1 and c=1, + it is better to use key (a,b,c) than (a) as it will ensure we don't also + use histograms for columns b and c +*/ + +static +int cmp_quick_ranges(TABLE *table, uint *a, uint *b) +{ + int tmp= CMP_NUM(table->quick_rows[*a], table->quick_rows[*b]); + if (tmp) + return tmp; + return -CMP_NUM(table->quick_key_parts[*a], table->quick_key_parts[*b]); +} + + /* Calculate the selectivity of the condition imposed on the rows of a table @@ -3285,10 +3304,10 @@ double records_in_column_ranges(PARAM *param, uint idx, bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) { - uint keynr; - uint max_quick_key_parts= 0; + uint keynr, range_index, ranges; MY_BITMAP *used_fields= &table->cond_set; - double table_records= (double)table->stat_records(); + double table_records= (double)table->stat_records(); + uint optimal_key_order[MAX_KEY]; DBUG_ENTER("calculate_cond_selectivity_for_table"); table->cond_selectivity= 1.0; @@ -3327,23 +3346,21 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond) Json_writer_object trace_wrapper(thd); Json_writer_array selectivity_for_indexes(thd, "selectivity_for_indexes"); - for (keynr= 0; keynr < table->s->keys; keynr++) - { + /* + Walk through all quick ranges in the order of least found rows. + */ + for (ranges= keynr= 0 ; keynr < table->s->keys; keynr++) if (table->quick_keys.is_set(keynr)) - set_if_bigger(max_quick_key_parts, table->quick_key_parts[keynr]); - } + optimal_key_order[ranges++]= keynr; - /* - Walk through all indexes, indexes where range access uses more keyparts - go first. - */ - for (uint quick_key_parts= max_quick_key_parts; - quick_key_parts; quick_key_parts--) + my_qsort2(optimal_key_order, ranges, + sizeof(optimal_key_order[0]), + (qsort2_cmp) cmp_quick_ranges, table); + + for (range_index= 0 ; range_index < ranges ; range_index++) { - for (keynr= 0; keynr < table->s->keys; keynr++) + uint keynr= optimal_key_order[range_index]; { - if (table->quick_keys.is_set(keynr) && - table->quick_key_parts[keynr] == quick_key_parts) { uint i; uint used_key_parts= table->quick_key_parts[keynr]; -- cgit v1.2.1