summaryrefslogtreecommitdiff
path: root/sql/multi_range_read.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/multi_range_read.cc')
-rw-r--r--sql/multi_range_read.cc150
1 files changed, 132 insertions, 18 deletions
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index d6952e71899..027d2f71d85 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -20,6 +20,18 @@
#include "key.h"
#include "sql_statistics.h"
+static ulonglong key_block_no(TABLE *table, uint keyno, ha_rows keyentry_pos)
+{
+ size_t len= table->key_info[keyno].key_length + table->file->ref_length;
+ if (keyno == table->s->primary_key &&
+ table->file->primary_key_is_clustered())
+ len= table->s->stored_rec_length;
+ uint keys_per_block= (uint) (table->file->stats.block_size/2.0/len+1);
+ ulonglong block_no= !keyentry_pos ? 0 :
+ (keyentry_pos - 1) / keys_per_block + 1;
+ return block_no;
+}
+
/****************************************************************************
* Default MRR implementation (MRR to non-MRR converter)
***************************************************************************/
@@ -47,6 +59,24 @@
for a user to be able to interrupt the calculation by killing the
connection/query.
+ @note
+ Starting from 10.4 the implementation of this method tries to take into
+ account gaps between range intervals. Before this we had such paradoxical
+ cases when, for example, the cost of the index scan by range [1..3] was
+ almost twice as less than the cost of of the index scan by two intervals
+ [1..1] and [3..3].
+
+ @note
+ The current implementation of the method is not efficient for it
+ requires extra dives for gaps. Although these dives are not expensive
+ as they touch the index nodes that with very high probability are in
+ cache this is still not good. We could avoid it if records in range
+ also returned positions of the ends of range intervals. It's not
+ hard to implement it now for MyISAM as this engine provides a function
+ returning an approximation of the relative position of a key tuple
+ among other index key tuples. Unfortunately InnoDB now does not provide
+ anything like this function.
+
@retval
HA_POS_ERROR Error or the engine is unable to perform the requested
scan. Values of OUT parameters are undefined.
@@ -61,12 +91,21 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
uint *bufsz, uint *flags, Cost_estimate *cost)
{
KEY_MULTI_RANGE range;
+ key_range prev_start_key;
range_seq_t seq_it;
- ha_rows rows, total_rows= 0;
+ ha_rows min_pos= 0;
+ ha_rows total_rows= 0;
uint n_ranges=0;
+ uint n_eq_ranges= 0;
+ ulonglong total_touched_blocks= 0;
+ key_range *prev_min_endp= 0;
+ ulonglong prev_max_block_no=0;
+ ha_rows max_rows= stats.records;
THD *thd= table->in_use;
- uint limit= thd->variables.eq_range_index_dive_limit;
+ StringBuffer<64> key_value;
+ uint limit= thd->variables.eq_range_index_dive_limit;
+
bool use_statistics_for_eq_range= eq_ranges_exceeds_limit(seq,
seq_init_param,
limit);
@@ -77,10 +116,15 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
seq_it= seq->init(seq_init_param, n_ranges, *flags);
while (!seq->next(seq_it, &range))
{
+ ha_rows rows;
+ ulonglong new_touched_blocks= 0;
+
if (unlikely(thd->killed != 0))
return HA_POS_ERROR;
n_ranges++;
+ if (range.range_flag & EQ_RANGE)
+ n_eq_ranges++;
key_range *min_endp, *max_endp;
if (range.range_flag & GEOM_FLAG)
{
@@ -95,38 +139,96 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
max_endp= range.end_key.length? &range.end_key : NULL;
}
int keyparts_used= my_count_bits(range.start_key.keypart_map);
- if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
- rows= 1; /* there can be at most one row */
- else if (use_statistics_for_eq_range &&
- !(range.range_flag & NULL_RANGE) &&
- (range.range_flag & EQ_RANGE) &&
- table->key_info[keyno].actual_rec_per_key(keyparts_used - 1) > 0.5)
- rows=
- (ha_rows) table->key_info[keyno].actual_rec_per_key(keyparts_used - 1);
+ if (use_statistics_for_eq_range &&
+ !(range.range_flag & NULL_RANGE) &&
+ (range.range_flag & EQ_RANGE) &&
+ table->key_info[keyno].actual_rec_per_key(keyparts_used - 1) > 0.5)
+ {
+ if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
+ rows= 1; /* there can be at most one row */
+ else
+ rows=
+ (ha_rows) table->key_info[keyno].actual_rec_per_key(keyparts_used-1);
+ }
else
{
- if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
+ ulonglong min_block_no;
+ ulonglong max_block_no;
+ if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
+ rows= 1; /* there can be at most one row */
+ else if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
max_endp)))
{
/* Can't scan one range => can't do MRR scan at all */
total_rows= HA_POS_ERROR;
break;
}
+ if (!max_endp && !(prev_min_endp && prev_min_endp->length))
+ min_pos+= max_rows - rows;
+ else
+ {
+ key_range *start_endp= prev_min_endp;
+ if (start_endp && !start_endp->keypart_map)
+ start_endp= 0;
+ /*
+ Get the estimate of rows in the previous gap
+ and two ranges surrounding this gap
+ */
+ ha_rows r= this->records_in_range(keyno,start_endp,max_endp);
+ if (r == HA_POS_ERROR)
+ {
+ /* Some engine cannot estimate such ranges */
+ total_rows += rows;
+ continue;
+ }
+ min_pos+= r - rows;
+ }
+ min_block_no= key_block_no(this->table, keyno, min_pos);
+ max_block_no= key_block_no(this->table, keyno, min_pos + rows);
+ new_touched_blocks= max_block_no - min_block_no +
+ MY_TEST(min_block_no != prev_max_block_no);
+ prev_max_block_no= max_block_no;
+ if (!prev_min_endp)
+ prev_min_endp= &prev_start_key;
+ /* Save range.start_key for the next iteration step */
+ prev_start_key= range.start_key;
+ key_value.copy((const char *) prev_start_key.key, prev_start_key.length,
+ key_value.charset());
+ prev_start_key.key= (const uchar *) key_value.ptr();
}
total_rows += rows;
+ total_touched_blocks+= new_touched_blocks;
}
if (total_rows != HA_POS_ERROR)
{
+ set_if_smaller(total_rows, max_rows);
/* The following calculation is the same as in multi_range_read_info(): */
*flags |= HA_MRR_USE_DEFAULT_IMPL;
cost->reset();
cost->avg_io_cost= 1; /* assume random seeks */
- if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
- cost->io_count= keyread_time(keyno, n_ranges, (uint)total_rows);
+ cost->idx_avg_io_cost= 1;
+ if (!((keyno == table->s->primary_key && primary_key_is_clustered()) ||
+ is_clustering_key(keyno)))
+ {
+ cost->idx_io_count= total_touched_blocks +
+ keyread_time(keyno, 0, total_rows);
+ cost->cpu_cost= cost->idx_cpu_cost=
+ (double) total_rows / TIME_FOR_COMPARE_IDX +
+ (2 * n_ranges - n_eq_ranges) * IDX_LOOKUP_COST;
+ if (!(*flags & HA_MRR_INDEX_ONLY))
+ {
+ cost->io_count= read_time(keyno, 0, total_rows);
+ cost->cpu_cost+= (double) total_rows / TIME_FOR_COMPARE;
+ }
+ }
else
- cost->io_count= read_time(keyno, n_ranges, total_rows);
- cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
+ {
+ cost->io_count= read_time(keyno,
+ (uint)total_touched_blocks,
+ (uint) total_rows);
+ cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
+ }
}
return total_rows;
}
@@ -183,10 +285,22 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
cost->avg_io_cost= 1; /* assume random seeks */
/* Produce the same cost as non-MRR code does */
- if (*flags & HA_MRR_INDEX_ONLY)
- cost->io_count= keyread_time(keyno, n_ranges, n_rows);
+ if (!(keyno == table->s->primary_key && primary_key_is_clustered()))
+ {
+ cost->idx_io_count= n_ranges + keyread_time(keyno, 0, n_rows);
+ cost->cpu_cost= cost->idx_cpu_cost=
+ (double) n_rows / TIME_FOR_COMPARE_IDX + n_ranges * IDX_LOOKUP_COST;
+ if (!(*flags & HA_MRR_INDEX_ONLY))
+ {
+ cost->io_count= read_time(keyno, 0, n_rows);
+ cost->cpu_cost+= (double) n_rows / TIME_FOR_COMPARE;
+ }
+ }
else
- cost->io_count= read_time(keyno, n_ranges, n_rows);
+ {
+ cost->io_count= read_time(keyno, n_ranges, (uint)n_rows);
+ cost->cpu_cost= (double) n_rows / TIME_FOR_COMPARE + 0.01;
+ }
return 0;
}