diff options
Diffstat (limited to 'sql/multi_range_read.cc')
-rw-r--r-- | sql/multi_range_read.cc | 150 |
1 files changed, 132 insertions, 18 deletions
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index d6952e71899..027d2f71d85 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -20,6 +20,18 @@ #include "key.h" #include "sql_statistics.h" +static ulonglong key_block_no(TABLE *table, uint keyno, ha_rows keyentry_pos) +{ + size_t len= table->key_info[keyno].key_length + table->file->ref_length; + if (keyno == table->s->primary_key && + table->file->primary_key_is_clustered()) + len= table->s->stored_rec_length; + uint keys_per_block= (uint) (table->file->stats.block_size/2.0/len+1); + ulonglong block_no= !keyentry_pos ? 0 : + (keyentry_pos - 1) / keys_per_block + 1; + return block_no; +} + /**************************************************************************** * Default MRR implementation (MRR to non-MRR converter) ***************************************************************************/ @@ -47,6 +59,24 @@ for a user to be able to interrupt the calculation by killing the connection/query. + @note + Starting from 10.4 the implementation of this method tries to take into + account gaps between range intervals. Before this we had such paradoxical + cases when, for example, the cost of the index scan by range [1..3] was + almost twice as less than the cost of of the index scan by two intervals + [1..1] and [3..3]. + + @note + The current implementation of the method is not efficient for it + requires extra dives for gaps. Although these dives are not expensive + as they touch the index nodes that with very high probability are in + cache this is still not good. We could avoid it if records in range + also returned positions of the ends of range intervals. It's not + hard to implement it now for MyISAM as this engine provides a function + returning an approximation of the relative position of a key tuple + among other index key tuples. Unfortunately InnoDB now does not provide + anything like this function. + @retval HA_POS_ERROR Error or the engine is unable to perform the requested scan. Values of OUT parameters are undefined. @@ -61,12 +91,21 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, uint *bufsz, uint *flags, Cost_estimate *cost) { KEY_MULTI_RANGE range; + key_range prev_start_key; range_seq_t seq_it; - ha_rows rows, total_rows= 0; + ha_rows min_pos= 0; + ha_rows total_rows= 0; uint n_ranges=0; + uint n_eq_ranges= 0; + ulonglong total_touched_blocks= 0; + key_range *prev_min_endp= 0; + ulonglong prev_max_block_no=0; + ha_rows max_rows= stats.records; THD *thd= table->in_use; - uint limit= thd->variables.eq_range_index_dive_limit; + StringBuffer<64> key_value; + uint limit= thd->variables.eq_range_index_dive_limit; + bool use_statistics_for_eq_range= eq_ranges_exceeds_limit(seq, seq_init_param, limit); @@ -77,10 +116,15 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, seq_it= seq->init(seq_init_param, n_ranges, *flags); while (!seq->next(seq_it, &range)) { + ha_rows rows; + ulonglong new_touched_blocks= 0; + if (unlikely(thd->killed != 0)) return HA_POS_ERROR; n_ranges++; + if (range.range_flag & EQ_RANGE) + n_eq_ranges++; key_range *min_endp, *max_endp; if (range.range_flag & GEOM_FLAG) { @@ -95,38 +139,96 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, max_endp= range.end_key.length? &range.end_key : NULL; } int keyparts_used= my_count_bits(range.start_key.keypart_map); - if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) - rows= 1; /* there can be at most one row */ - else if (use_statistics_for_eq_range && - !(range.range_flag & NULL_RANGE) && - (range.range_flag & EQ_RANGE) && - table->key_info[keyno].actual_rec_per_key(keyparts_used - 1) > 0.5) - rows= - (ha_rows) table->key_info[keyno].actual_rec_per_key(keyparts_used - 1); + if (use_statistics_for_eq_range && + !(range.range_flag & NULL_RANGE) && + (range.range_flag & EQ_RANGE) && + table->key_info[keyno].actual_rec_per_key(keyparts_used - 1) > 0.5) + { + if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) + rows= 1; /* there can be at most one row */ + else + rows= + (ha_rows) table->key_info[keyno].actual_rec_per_key(keyparts_used-1); + } else { - if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, + ulonglong min_block_no; + ulonglong max_block_no; + if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) + rows= 1; /* there can be at most one row */ + else if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, max_endp))) { /* Can't scan one range => can't do MRR scan at all */ total_rows= HA_POS_ERROR; break; } + if (!max_endp && !(prev_min_endp && prev_min_endp->length)) + min_pos+= max_rows - rows; + else + { + key_range *start_endp= prev_min_endp; + if (start_endp && !start_endp->keypart_map) + start_endp= 0; + /* + Get the estimate of rows in the previous gap + and two ranges surrounding this gap + */ + ha_rows r= this->records_in_range(keyno,start_endp,max_endp); + if (r == HA_POS_ERROR) + { + /* Some engine cannot estimate such ranges */ + total_rows += rows; + continue; + } + min_pos+= r - rows; + } + min_block_no= key_block_no(this->table, keyno, min_pos); + max_block_no= key_block_no(this->table, keyno, min_pos + rows); + new_touched_blocks= max_block_no - min_block_no + + MY_TEST(min_block_no != prev_max_block_no); + prev_max_block_no= max_block_no; + if (!prev_min_endp) + prev_min_endp= &prev_start_key; + /* Save range.start_key for the next iteration step */ + prev_start_key= range.start_key; + key_value.copy((const char *) prev_start_key.key, prev_start_key.length, + key_value.charset()); + prev_start_key.key= (const uchar *) key_value.ptr(); } total_rows += rows; + total_touched_blocks+= new_touched_blocks; } if (total_rows != HA_POS_ERROR) { + set_if_smaller(total_rows, max_rows); /* The following calculation is the same as in multi_range_read_info(): */ *flags |= HA_MRR_USE_DEFAULT_IMPL; cost->reset(); cost->avg_io_cost= 1; /* assume random seeks */ - if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2) - cost->io_count= keyread_time(keyno, n_ranges, (uint)total_rows); + cost->idx_avg_io_cost= 1; + if (!((keyno == table->s->primary_key && primary_key_is_clustered()) || + is_clustering_key(keyno))) + { + cost->idx_io_count= total_touched_blocks + + keyread_time(keyno, 0, total_rows); + cost->cpu_cost= cost->idx_cpu_cost= + (double) total_rows / TIME_FOR_COMPARE_IDX + + (2 * n_ranges - n_eq_ranges) * IDX_LOOKUP_COST; + if (!(*flags & HA_MRR_INDEX_ONLY)) + { + cost->io_count= read_time(keyno, 0, total_rows); + cost->cpu_cost+= (double) total_rows / TIME_FOR_COMPARE; + } + } else - cost->io_count= read_time(keyno, n_ranges, total_rows); - cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; + { + cost->io_count= read_time(keyno, + (uint)total_touched_blocks, + (uint) total_rows); + cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; + } } return total_rows; } @@ -183,10 +285,22 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, cost->avg_io_cost= 1; /* assume random seeks */ /* Produce the same cost as non-MRR code does */ - if (*flags & HA_MRR_INDEX_ONLY) - cost->io_count= keyread_time(keyno, n_ranges, n_rows); + if (!(keyno == table->s->primary_key && primary_key_is_clustered())) + { + cost->idx_io_count= n_ranges + keyread_time(keyno, 0, n_rows); + cost->cpu_cost= cost->idx_cpu_cost= + (double) n_rows / TIME_FOR_COMPARE_IDX + n_ranges * IDX_LOOKUP_COST; + if (!(*flags & HA_MRR_INDEX_ONLY)) + { + cost->io_count= read_time(keyno, 0, n_rows); + cost->cpu_cost+= (double) n_rows / TIME_FOR_COMPARE; + } + } else - cost->io_count= read_time(keyno, n_ranges, n_rows); + { + cost->io_count= read_time(keyno, n_ranges, (uint)n_rows); + cost->cpu_cost= (double) n_rows / TIME_FOR_COMPARE + 0.01; + } return 0; } |