diff options
author | Monty <monty@mariadb.org> | 2022-09-25 18:44:48 +0300 |
---|---|---|
committer | Sergei Petrunia <sergey@mariadb.com> | 2023-02-02 23:55:08 +0300 |
commit | 7afa819f727144e8a107e28444e07d54045ab38e (patch) | |
tree | 80c795559127f609e6fc9806cc234dcc01cbcf4a /sql/opt_range.cc | |
parent | 009db2288ba8d22c5e8e5e047d8f8694f7097923 (diff) | |
download | mariadb-git-7afa819f727144e8a107e28444e07d54045ab38e.tar.gz |
Fix cost calculation for get_best_group_min_max()
If the final range restrictions (SEL_ARG tree) over GROUP BY
columns are single-point, we can compute the number of GROUP BY groups.
Example: in the query:
SELECT ... FROM tbl
WHERE keypart1 IN (1,2,3) and keypart2 IN ('foo','bar')
Other things:
- Fixed cost calculation to more correctly count the number of blocks
that may be read. The old code could use the total blocks in the file
even if a range was available.
Diffstat (limited to 'sql/opt_range.cc')
-rw-r--r-- | sql/opt_range.cc | 189 |
1 files changed, 129 insertions, 60 deletions
diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 78e577c31f2..b2e109a5a72 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -2174,6 +2174,27 @@ int SEL_ARG::sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag, } +/* + Check if min and values are equal + + @return 1 if equal +*/ + +bool SEL_ARG::min_max_are_equal() const +{ + uint offset= 0; + if (field->real_maybe_null()) // If null is part of key + { + if (*min_value != *max_value) + return 0; + if (*min_value) + return 1; // NULL where equal + offset= 1; // Skip NULL marker + } + return field->key_cmp(min_value+offset, max_value+offset) == 0; +} + + SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param) { SEL_ARG tmp_link,*next_arg,*root; @@ -11040,6 +11061,50 @@ uint SEL_ARG::get_max_key_part() const } +/** + Compute the number of eq_ranges top elements in the tree + + This is used by the cost_group_min_max() to calculate the number of + groups in SEL_TREE + + @param group_key_parts number of key parts that must be equal + + @return < 0 Not known + @return >= 0 Number of groups +*/ + +int SEL_ARG::number_of_eq_groups(uint group_key_parts) const +{ + int elements= 0; + SEL_ARG const *cur; + + if (part > group_key_parts-1 || type != KEY_RANGE) + return -1; + + cur= first(); + do + { + if ((cur->min_flag | cur->min_flag) & + (NO_MIN_RANGE | NO_MAX_RANGE | NEAR_MIN | NEAR_MAX | GEOM_FLAG)) + return -1; + if (min_value != max_value && !min_max_are_equal()) + return -1; + if (part != group_key_parts -1) + { + int tmp; + if (!next_key_part) + return -1; + if ((tmp= next_key_part->number_of_eq_groups(group_key_parts)) < 0) + return -1; + elements+= tmp; + } + else + elements++; + } while ((cur= cur->next)); + return elements; +} + + /* Remove the SEL_ARG graph elements which have part > max_part. @@ -11092,8 +11157,8 @@ void prune_sel_arg_graph(SEL_ARG *sel_arg, uint max_part) @return tree pointer The tree after processing, - NULL If it was not possible to reduce the weight of the tree below the - limit. + NULL If it was not possible to reduce the weight of the tree below + the limit. */ SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno, @@ -14950,7 +15015,7 @@ get_field_keypart(KEY *index, Field *field) have_min [in] True if there is a MIN function have_max [in] True if there is a MAX function read_cost [out] The cost to retrieve rows via this quick select - records [out] The number of rows retrieved + out_records [out] The number of rows retrieved DESCRIPTION This method computes the access cost of a TRP_GROUP_MIN_MAX instance and @@ -14968,6 +15033,20 @@ get_field_keypart(KEY *index, Field *field) either scan the index for the next value or do a new index dive with 'find next bigger key'. + When using MIN() and MAX() in the query, the calls to the storage engine + are as follows for each group: + Assuming kp1 in ('abc','def','ghi)' and kp2 between 1000 and 2000 + + read_key('abc', HA_READ_KEY_OR_NEXT) + In case of MIN() we do: + read_key('abc,:'1000', HA_READ_KEY_OR_NEXT) + In case of MAX() we do + read_key('abc,:'2000', HA_READ_PREFIX_LAST_OR_PREV) + In the following code we will assume that the MIN key will be in + the same block as the first key read. + (We should try to optimize away the extra call for MAX() at some + point). + NOTES See get_best_group_min_max() for which kind of queries this function will be called. @@ -15013,36 +15092,27 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, uint group_key_parts, SEL_TREE *range_tree, SEL_ARG *index_tree, ha_rows quick_prefix_records, bool have_min, bool have_max, - double *read_cost, ha_rows *records) + double *read_cost, ha_rows *out_records) { - uint keys_per_block, key_length; - ha_rows table_records; + uint key_length; + ha_rows records; ha_rows num_groups; ha_rows num_blocks; ha_rows keys_per_group; - ha_rows keys_per_subgroup; /* Average number of keys in sub-groups */ - /* formed by a key infix. */ - double p_overlap; /* Probability that a sub-group overlaps two blocks. */ double quick_prefix_selectivity; - double io_cost; + ulonglong io_cost; handler *file= table->file; DBUG_ENTER("cost_group_min_max"); /* Same code as in handler::key_read_time() */ - table_records= table->stat_records(); + records= table->stat_records(); key_length= (index_info->key_length + file->ref_length); - num_blocks= (table_records * key_length / INDEX_BLOCK_FILL_FACTOR_DIV * - INDEX_BLOCK_FILL_FACTOR_MUL) / file->stats.block_size + 1; - keys_per_block= (file->stats.block_size / - (key_length * INDEX_BLOCK_FILL_FACTOR_MUL / - INDEX_BLOCK_FILL_FACTOR_DIV) + - 1); /* Compute the number of keys in a group. */ if (!group_key_parts) { /* Summary over the whole table */ - keys_per_group= MY_MAX(table_records,1); + keys_per_group= MY_MAX(records,1); } else { @@ -15050,62 +15120,61 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, 1); if (keys_per_group == 0) /* If there is no statistics try to guess */ { - /* each group contains 10% of all records */ - keys_per_group= (table_records / 10) + 1; + /* each group contains 1% of all records */ + keys_per_group= (records / 100) + 1; } } if (keys_per_group > 1) - num_groups= (table_records / keys_per_group) + 1; + num_groups= (records / keys_per_group) + 1; else - num_groups= table_records; + num_groups= records; /* Apply the selectivity of the quick select for group prefixes. */ if (range_tree && (quick_prefix_records != HA_POS_ERROR)) { + int groups; quick_prefix_selectivity= (double) quick_prefix_records / - (double) table_records; + (double) records; num_groups= (ha_rows) rint(num_groups * quick_prefix_selectivity); + records= quick_prefix_records; + /* - Expect at least as many groups as there is ranges in the index + Try to handle cases like WHERE a in (1,2,3) GROUP BY a - This is mostly relevant for queries with few records, which is - something we have a lot of in our test suites. - In theory it is possible to scan the index_tree and for cases - where all ranges are eq ranges, we could calculate the exact number - of groups. This is probably an overkill so for now we estimate - the lower level of number of groups by the range elements in the - tree. - */ - set_if_bigger(num_groups, MY_MAX(index_tree->elements, 1)); - /* There cannot be more groups than matched records */ - set_if_smaller(num_groups, quick_prefix_records); - } - DBUG_ASSERT(num_groups <= table_records); - - if (used_key_parts > group_key_parts) - { - /* - Compute the probability that two ends of a subgroup are inside - different blocks. + If all ranges are eq_ranges for the group_key_parts we can use + this as the number of groups. */ - keys_per_subgroup= (ha_rows) index_info->actual_rec_per_key(used_key_parts - - 1); - if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */ - p_overlap= 1.0; /* a block, it will overlap at least two blocks. */ + groups= index_tree->number_of_eq_groups(group_key_parts); + if (groups > 0) + num_groups= groups; else { - double blocks_per_group= (double) num_blocks / (double) num_groups; - p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group; - p_overlap= MY_MIN(p_overlap, 1.0); + /* + Expect at least as many groups as there is ranges in the index + + This is mostly relevant for queries with few records, which is + something we have a lot of in our test suites. + In theory it is possible to scan the index_tree and for cases + where all ranges are eq ranges, we could calculate the exact number + of groups. This is probably an overkill so for now we estimate + the lower level of number of groups by the range elements in the + tree. + */ + set_if_bigger(num_groups, MY_MAX(index_tree->elements, 1)); } - io_cost= (double) MY_MIN(num_groups * (1 + p_overlap), num_blocks); + /* There cannot be more groups than matched records */ + set_if_smaller(num_groups, quick_prefix_records); } - else - io_cost= ((keys_per_group > keys_per_block) ? - (have_min && have_max) ? (double) (num_groups + 1) : - (double) num_groups : - (double) num_blocks); + DBUG_ASSERT(num_groups <= records); + + + /* Calculate the number of blocks we will touch for the table or range scan */ + num_blocks= (records * key_length / INDEX_BLOCK_FILL_FACTOR_DIV * + INDEX_BLOCK_FILL_FACTOR_MUL) / file->stats.block_size + 1; + + io_cost= (have_max) ? num_groups*2 : num_groups; + set_if_smaller(io_cost, num_blocks); /* CPU cost must be comparable to that of an index scan as computed @@ -15118,13 +15187,13 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, *read_cost= file->ha_keyread_and_compare_time(keyno, (ulong) num_groups, num_groups, io_cost); - *records= num_groups; + *out_records= num_groups; DBUG_PRINT("info", - ("table rows: %lu keys/block: %u keys/group: %lu " + ("rows: %lu keys/group: %lu " "result rows: %lu blocks: %lu", - (ulong) table_records, keys_per_block, (ulong) keys_per_group, - (ulong) *records, (ulong) num_blocks)); + (ulong) records, (ulong) keys_per_group, + (ulong) *out_records, (ulong) num_blocks)); DBUG_VOID_RETURN; } |