summaryrefslogtreecommitdiff
path: root/sql/opt_range.cc
diff options
context:
space:
mode:
authorMonty <monty@mariadb.org>2022-09-25 18:44:48 +0300
committerSergei Petrunia <sergey@mariadb.com>2023-02-02 23:55:08 +0300
commit7afa819f727144e8a107e28444e07d54045ab38e (patch)
tree80c795559127f609e6fc9806cc234dcc01cbcf4a /sql/opt_range.cc
parent009db2288ba8d22c5e8e5e047d8f8694f7097923 (diff)
downloadmariadb-git-7afa819f727144e8a107e28444e07d54045ab38e.tar.gz
Fix cost calculation for get_best_group_min_max()
If the final range restrictions (SEL_ARG tree) over GROUP BY columns are single-point, we can compute the number of GROUP BY groups. Example: in the query: SELECT ... FROM tbl WHERE keypart1 IN (1,2,3) and keypart2 IN ('foo','bar') Other things: - Fixed cost calculation to more correctly count the number of blocks that may be read. The old code could use the total blocks in the file even if a range was available.
Diffstat (limited to 'sql/opt_range.cc')
-rw-r--r--sql/opt_range.cc189
1 files changed, 129 insertions, 60 deletions
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 78e577c31f2..b2e109a5a72 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2174,6 +2174,27 @@ int SEL_ARG::sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag,
}
+/*
+ Check if min and values are equal
+
+ @return 1 if equal
+*/
+
+bool SEL_ARG::min_max_are_equal() const
+{
+ uint offset= 0;
+ if (field->real_maybe_null()) // If null is part of key
+ {
+ if (*min_value != *max_value)
+ return 0;
+ if (*min_value)
+ return 1; // NULL where equal
+ offset= 1; // Skip NULL marker
+ }
+ return field->key_cmp(min_value+offset, max_value+offset) == 0;
+}
+
+
SEL_ARG *SEL_ARG::clone_tree(RANGE_OPT_PARAM *param)
{
SEL_ARG tmp_link,*next_arg,*root;
@@ -11040,6 +11061,50 @@ uint SEL_ARG::get_max_key_part() const
}
+/**
+ Compute the number of eq_ranges top elements in the tree
+
+ This is used by the cost_group_min_max() to calculate the number of
+ groups in SEL_TREE
+
+ @param group_key_parts number of key parts that must be equal
+
+ @return < 0 Not known
+ @return >= 0 Number of groups
+*/
+
+int SEL_ARG::number_of_eq_groups(uint group_key_parts) const
+{
+ int elements= 0;
+ SEL_ARG const *cur;
+
+ if (part > group_key_parts-1 || type != KEY_RANGE)
+ return -1;
+
+ cur= first();
+ do
+ {
+ if ((cur->min_flag | cur->min_flag) &
+ (NO_MIN_RANGE | NO_MAX_RANGE | NEAR_MIN | NEAR_MAX | GEOM_FLAG))
+ return -1;
+ if (min_value != max_value && !min_max_are_equal())
+ return -1;
+ if (part != group_key_parts -1)
+ {
+ int tmp;
+ if (!next_key_part)
+ return -1;
+ if ((tmp= next_key_part->number_of_eq_groups(group_key_parts)) < 0)
+ return -1;
+ elements+= tmp;
+ }
+ else
+ elements++;
+ } while ((cur= cur->next));
+ return elements;
+}
+
+
/*
Remove the SEL_ARG graph elements which have part > max_part.
@@ -11092,8 +11157,8 @@ void prune_sel_arg_graph(SEL_ARG *sel_arg, uint max_part)
@return
tree pointer The tree after processing,
- NULL If it was not possible to reduce the weight of the tree below the
- limit.
+ NULL If it was not possible to reduce the weight of the tree below
+ the limit.
*/
SEL_ARG *enforce_sel_arg_weight_limit(RANGE_OPT_PARAM *param, uint keyno,
@@ -14950,7 +15015,7 @@ get_field_keypart(KEY *index, Field *field)
have_min [in] True if there is a MIN function
have_max [in] True if there is a MAX function
read_cost [out] The cost to retrieve rows via this quick select
- records [out] The number of rows retrieved
+ out_records [out] The number of rows retrieved
DESCRIPTION
This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
@@ -14968,6 +15033,20 @@ get_field_keypart(KEY *index, Field *field)
either scan the index for the next value or do a new index dive
with 'find next bigger key'.
+ When using MIN() and MAX() in the query, the calls to the storage engine
+ are as follows for each group:
+ Assuming kp1 in ('abc','def','ghi)' and kp2 between 1000 and 2000
+
+ read_key('abc', HA_READ_KEY_OR_NEXT)
+ In case of MIN() we do:
+ read_key('abc,:'1000', HA_READ_KEY_OR_NEXT)
+ In case of MAX() we do
+ read_key('abc,:'2000', HA_READ_PREFIX_LAST_OR_PREV)
+ In the following code we will assume that the MIN key will be in
+ the same block as the first key read.
+ (We should try to optimize away the extra call for MAX() at some
+ point).
+
NOTES
See get_best_group_min_max() for which kind of queries this function
will be called.
@@ -15013,36 +15092,27 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
uint group_key_parts, SEL_TREE *range_tree,
SEL_ARG *index_tree, ha_rows quick_prefix_records,
bool have_min, bool have_max,
- double *read_cost, ha_rows *records)
+ double *read_cost, ha_rows *out_records)
{
- uint keys_per_block, key_length;
- ha_rows table_records;
+ uint key_length;
+ ha_rows records;
ha_rows num_groups;
ha_rows num_blocks;
ha_rows keys_per_group;
- ha_rows keys_per_subgroup; /* Average number of keys in sub-groups */
- /* formed by a key infix. */
- double p_overlap; /* Probability that a sub-group overlaps two blocks. */
double quick_prefix_selectivity;
- double io_cost;
+ ulonglong io_cost;
handler *file= table->file;
DBUG_ENTER("cost_group_min_max");
/* Same code as in handler::key_read_time() */
- table_records= table->stat_records();
+ records= table->stat_records();
key_length= (index_info->key_length + file->ref_length);
- num_blocks= (table_records * key_length / INDEX_BLOCK_FILL_FACTOR_DIV *
- INDEX_BLOCK_FILL_FACTOR_MUL) / file->stats.block_size + 1;
- keys_per_block= (file->stats.block_size /
- (key_length * INDEX_BLOCK_FILL_FACTOR_MUL /
- INDEX_BLOCK_FILL_FACTOR_DIV) +
- 1);
/* Compute the number of keys in a group. */
if (!group_key_parts)
{
/* Summary over the whole table */
- keys_per_group= MY_MAX(table_records,1);
+ keys_per_group= MY_MAX(records,1);
}
else
{
@@ -15050,62 +15120,61 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
1);
if (keys_per_group == 0) /* If there is no statistics try to guess */
{
- /* each group contains 10% of all records */
- keys_per_group= (table_records / 10) + 1;
+ /* each group contains 1% of all records */
+ keys_per_group= (records / 100) + 1;
}
}
if (keys_per_group > 1)
- num_groups= (table_records / keys_per_group) + 1;
+ num_groups= (records / keys_per_group) + 1;
else
- num_groups= table_records;
+ num_groups= records;
/* Apply the selectivity of the quick select for group prefixes. */
if (range_tree && (quick_prefix_records != HA_POS_ERROR))
{
+ int groups;
quick_prefix_selectivity= (double) quick_prefix_records /
- (double) table_records;
+ (double) records;
num_groups= (ha_rows) rint(num_groups * quick_prefix_selectivity);
+ records= quick_prefix_records;
+
/*
- Expect at least as many groups as there is ranges in the index
+ Try to handle cases like
WHERE a in (1,2,3) GROUP BY a
- This is mostly relevant for queries with few records, which is
- something we have a lot of in our test suites.
- In theory it is possible to scan the index_tree and for cases
- where all ranges are eq ranges, we could calculate the exact number
- of groups. This is probably an overkill so for now we estimate
- the lower level of number of groups by the range elements in the
- tree.
- */
- set_if_bigger(num_groups, MY_MAX(index_tree->elements, 1));
- /* There cannot be more groups than matched records */
- set_if_smaller(num_groups, quick_prefix_records);
- }
- DBUG_ASSERT(num_groups <= table_records);
-
- if (used_key_parts > group_key_parts)
- {
- /*
- Compute the probability that two ends of a subgroup are inside
- different blocks.
+ If all ranges are eq_ranges for the group_key_parts we can use
+ this as the number of groups.
*/
- keys_per_subgroup= (ha_rows) index_info->actual_rec_per_key(used_key_parts -
- 1);
- if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
- p_overlap= 1.0; /* a block, it will overlap at least two blocks. */
+ groups= index_tree->number_of_eq_groups(group_key_parts);
+ if (groups > 0)
+ num_groups= groups;
else
{
- double blocks_per_group= (double) num_blocks / (double) num_groups;
- p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
- p_overlap= MY_MIN(p_overlap, 1.0);
+ /*
+ Expect at least as many groups as there is ranges in the index
+
+ This is mostly relevant for queries with few records, which is
+ something we have a lot of in our test suites.
+ In theory it is possible to scan the index_tree and for cases
+ where all ranges are eq ranges, we could calculate the exact number
+ of groups. This is probably an overkill so for now we estimate
+ the lower level of number of groups by the range elements in the
+ tree.
+ */
+ set_if_bigger(num_groups, MY_MAX(index_tree->elements, 1));
}
- io_cost= (double) MY_MIN(num_groups * (1 + p_overlap), num_blocks);
+ /* There cannot be more groups than matched records */
+ set_if_smaller(num_groups, quick_prefix_records);
}
- else
- io_cost= ((keys_per_group > keys_per_block) ?
- (have_min && have_max) ? (double) (num_groups + 1) :
- (double) num_groups :
- (double) num_blocks);
+ DBUG_ASSERT(num_groups <= records);
+
+
+ /* Calculate the number of blocks we will touch for the table or range scan */
+ num_blocks= (records * key_length / INDEX_BLOCK_FILL_FACTOR_DIV *
+ INDEX_BLOCK_FILL_FACTOR_MUL) / file->stats.block_size + 1;
+
+ io_cost= (have_max) ? num_groups*2 : num_groups;
+ set_if_smaller(io_cost, num_blocks);
/*
CPU cost must be comparable to that of an index scan as computed
@@ -15118,13 +15187,13 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
*read_cost= file->ha_keyread_and_compare_time(keyno, (ulong) num_groups,
num_groups,
io_cost);
- *records= num_groups;
+ *out_records= num_groups;
DBUG_PRINT("info",
- ("table rows: %lu keys/block: %u keys/group: %lu "
+ ("rows: %lu keys/group: %lu "
"result rows: %lu blocks: %lu",
- (ulong) table_records, keys_per_block, (ulong) keys_per_group,
- (ulong) *records, (ulong) num_blocks));
+ (ulong) records, (ulong) keys_per_group,
+ (ulong) *out_records, (ulong) num_blocks));
DBUG_VOID_RETURN;
}