diff options
author | Sergey Petrunya <psergey@askmonty.org> | 2009-12-22 15:33:21 +0300 |
---|---|---|
committer | Sergey Petrunya <psergey@askmonty.org> | 2009-12-22 15:33:21 +0300 |
commit | da5edf5057d392f3647570606220d80106c81a7b (patch) | |
tree | 922a9e1f2882014f8c5a5a5f524bf88e39ed41dd /sql/multi_range_read.cc | |
parent | 19f6f52a21d48914a2542bcaf23806ace6870e8e (diff) | |
download | mariadb-git-da5edf5057d392f3647570606220d80106c81a7b.tar.gz |
MWL#67: MRR backport
- Make index condition pushdown be controlled by an @@optimizer_switch flag,
not by @@engine_condition_pushdown
- Make MRR buffer size be controlled by @@mrr_buffer_size, not
by @@read_rnd_buffer_size
- Move parts of code to separate files
- Code cleanup
- Add --sorted_result to some SELECTs in tests.
Diffstat (limited to 'sql/multi_range_read.cc')
-rw-r--r-- | sql/multi_range_read.cc | 944 |
1 files changed, 944 insertions, 0 deletions
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc new file mode 100644 index 00000000000..644634c3d74 --- /dev/null +++ b/sql/multi_range_read.cc @@ -0,0 +1,944 @@ +#include "mysql_priv.h" +#include "sql_select.h" + +/**************************************************************************** + * Default MRR implementation (MRR to non-MRR converter) + ***************************************************************************/ + +/** + Get cost and other information about MRR scan over a known list of ranges + + Calculate estimated cost and other information about an MRR scan for given + sequence of ranges. + + @param keyno Index number + @param seq Range sequence to be traversed + @param seq_init_param First parameter for seq->init() + @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller + can't efficiently determine it + @param bufsz INOUT IN: Size of the buffer available for use + OUT: Size of the buffer that is expected to be actually + used, or 0 if buffer is not needed. + @param flags INOUT A combination of HA_MRR_* flags + @param cost OUT Estimated cost of MRR access + + @note + This method (or an overriding one in a derived class) must check for + thd->killed and return HA_POS_ERROR if it is not zero. This is required + for a user to be able to interrupt the calculation by killing the + connection/query. + + @retval + HA_POS_ERROR Error or the engine is unable to perform the requested + scan. Values of OUT parameters are undefined. + @retval + other OK, *cost contains cost of the scan, *bufsz and *flags + contain scan parameters. +*/ + +ha_rows +handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, uint n_ranges_arg, + uint *bufsz, uint *flags, COST_VECT *cost) +{ + KEY_MULTI_RANGE range; + range_seq_t seq_it; + ha_rows rows, total_rows= 0; + uint n_ranges=0; + THD *thd= current_thd; + + /* Default MRR implementation doesn't need buffer */ + *bufsz= 0; + + seq_it= seq->init(seq_init_param, n_ranges, *flags); + while (!seq->next(seq_it, &range)) + { + if (unlikely(thd->killed != 0)) + return HA_POS_ERROR; + + n_ranges++; + key_range *min_endp, *max_endp; + if (range.range_flag & GEOM_FLAG) + { + /* In this case tmp_min_flag contains the handler-read-function */ + range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG); + min_endp= &range.start_key; + max_endp= NULL; + } + else + { + min_endp= range.start_key.length? &range.start_key : NULL; + max_endp= range.end_key.length? &range.end_key : NULL; + } + if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) + rows= 1; /* there can be at most one row */ + else + { + if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, + max_endp))) + { + /* Can't scan one range => can't do MRR scan at all */ + total_rows= HA_POS_ERROR; + break; + } + } + total_rows += rows; + } + + if (total_rows != HA_POS_ERROR) + { + /* The following calculation is the same as in multi_range_read_info(): */ + *flags |= HA_MRR_USE_DEFAULT_IMPL; + cost->zero(); + cost->avg_io_cost= 1; /* assume random seeks */ + if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2) + cost->io_count= index_only_read_time(keyno, (uint)total_rows); + else + cost->io_count= read_time(keyno, n_ranges, total_rows); + cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; + } + return total_rows; +} + + +/** + Get cost and other information about MRR scan over some sequence of ranges + + Calculate estimated cost and other information about an MRR scan for some + sequence of ranges. + + The ranges themselves will be known only at execution phase. When this + function is called we only know number of ranges and a (rough) E(#records) + within those ranges. + + Currently this function is only called for "n-keypart singlepoint" ranges, + i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN" + + The flags parameter is a combination of those flags: HA_MRR_SORTED, + HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS. + + @param keyno Index number + @param n_ranges Estimated number of ranges (i.e. intervals) in the + range sequence. + @param n_rows Estimated total number of records contained within all + of the ranges + @param bufsz INOUT IN: Size of the buffer available for use + OUT: Size of the buffer that will be actually used, or + 0 if buffer is not needed. + @param flags INOUT A combination of HA_MRR_* flags + @param cost OUT Estimated cost of MRR access + + @retval + 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan + parameters. + @retval + other Error or can't perform the requested scan +*/ + +ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, + uint *bufsz, uint *flags, COST_VECT *cost) +{ + *bufsz= 0; /* Default implementation doesn't need a buffer */ + + *flags |= HA_MRR_USE_DEFAULT_IMPL; + + cost->zero(); + cost->avg_io_cost= 1; /* assume random seeks */ + + /* Produce the same cost as non-MRR code does */ + if (*flags & HA_MRR_INDEX_ONLY) + cost->io_count= index_only_read_time(keyno, n_rows); + else + cost->io_count= read_time(keyno, n_ranges, n_rows); + return 0; +} + + +/** + Initialize the MRR scan + + Initialize the MRR scan. This function may do heavyweight scan + initialization like row prefetching/sorting/etc (NOTE: but better not do + it here as we may not need it, e.g. if we never satisfy WHERE clause on + previous tables. For many implementations it would be natural to do such + initializations in the first multi_read_range_next() call) + + mode is a combination of the following flags: HA_MRR_SORTED, + HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION + + @param seq Range sequence to be traversed + @param seq_init_param First parameter for seq->init() + @param n_ranges Number of ranges in the sequence + @param mode Flags, see the description section for the details + @param buf INOUT: memory buffer to be used + + @note + One must have called index_init() before calling this function. Several + multi_range_read_init() calls may be made in course of one query. + + Until WL#2623 is done (see its text, section 3.2), the following will + also hold: + The caller will guarantee that if "seq->init == mrr_ranges_array_init" + then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures. + This property will only be used by NDB handler until WL#2623 is done. + + Buffer memory management is done according to the following scenario: + The caller allocates the buffer and provides it to the callee by filling + the members of HANDLER_BUFFER structure. + The callee consumes all or some fraction of the provided buffer space, and + sets the HANDLER_BUFFER members accordingly. + The callee may use the buffer memory until the next multi_range_read_init() + call is made, all records have been read, or until index_end() call is + made, whichever comes first. + + @retval 0 OK + @retval 1 Error +*/ + +int +handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param, + uint n_ranges, uint mode, HANDLER_BUFFER *buf) +{ + DBUG_ENTER("handler::multi_range_read_init"); + mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); + mrr_funcs= *seq_funcs; + mrr_is_output_sorted= test(mode & HA_MRR_SORTED); + mrr_have_range= FALSE; + DBUG_RETURN(0); +} + + +/** + Get next record in MRR scan + + Default MRR implementation: read the next record + + @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect + Otherwise, the opaque value associated with the range + that contains the returned record. + + @retval 0 OK + @retval other Error code +*/ + +int handler::multi_range_read_next(char **range_info) +{ + int UNINIT_VAR(result); + int range_res; + DBUG_ENTER("handler::multi_range_read_next"); + + if (!mrr_have_range) + { + mrr_have_range= TRUE; + goto start; + } + + do + { + /* Save a call if there can be only one row in range. */ + if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE)) + { + result= read_range_next(); + /* On success or non-EOF errors jump to the end. */ + if (result != HA_ERR_END_OF_FILE) + break; + } + else + { + if (was_semi_consistent_read()) + goto scan_it_again; + /* + We need to set this for the last range only, but checking this + condition is more expensive than just setting the result code. + */ + result= HA_ERR_END_OF_FILE; + } + +start: + /* Try the next range(s) until one matches a record. */ + while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range))) + { +scan_it_again: + result= read_range_first(mrr_cur_range.start_key.keypart_map ? + &mrr_cur_range.start_key : 0, + mrr_cur_range.end_key.keypart_map ? + &mrr_cur_range.end_key : 0, + test(mrr_cur_range.range_flag & EQ_RANGE), + mrr_is_output_sorted); + if (result != HA_ERR_END_OF_FILE) + break; + } + } + while ((result == HA_ERR_END_OF_FILE) && !range_res); + + *range_info= mrr_cur_range.ptr; + DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result)); + DBUG_RETURN(result); +} + +/**************************************************************************** + * DS-MRR implementation + ***************************************************************************/ + +/** + DS-MRR: Initialize and start MRR scan + + Initialize and start the MRR scan. Depending on the mode parameter, this + may use default or DS-MRR implementation. + + @param h Table handler to be used + @param key Index to be used + @param seq_funcs Interval sequence enumeration functions + @param seq_init_param Interval sequence enumeration parameter + @param n_ranges Number of ranges in the sequence. + @param mode HA_MRR_* modes to use + @param buf INOUT Buffer to use + + @retval 0 Ok, Scan started. + @retval other Error +*/ + +int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, uint mode, + HANDLER_BUFFER *buf) +{ + uint elem_size; + Item *pushed_cond= NULL; + handler *new_h2= 0; + DBUG_ENTER("DsMrr_impl::dsmrr_init"); + + /* + index_merge may invoke a scan on an object for which dsmrr_info[_const] + has not been called, so set the owner handler here as well. + */ + h= h_arg; + if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED) + { + use_default_impl= TRUE; + const int retval= + h->handler::multi_range_read_init(seq_funcs, seq_init_param, + n_ranges, mode, buf); + DBUG_RETURN(retval); + } + rowids_buf= buf->buffer; + + is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION); + + if (is_mrr_assoc) + status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count); + + rowids_buf_end= buf->buffer_end; + elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); + rowids_buf_last= rowids_buf + + ((rowids_buf_end - rowids_buf)/ elem_size)* + elem_size; + rowids_buf_end= rowids_buf_last; + + /* + There can be two cases: + - This is the first call since index_init(), h2==NULL + Need to setup h2 then. + - This is not the first call, h2 is initalized and set up appropriately. + The caller might have called h->index_init(), need to switch h to + rnd_pos calls. + */ + if (!h2) + { + /* Create a separate handler object to do rndpos() calls. */ + THD *thd= current_thd; + /* + ::clone() takes up a lot of stack, especially on 64 bit platforms. + The constant 5 is an empiric result. + */ + if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) + DBUG_RETURN(1); + DBUG_ASSERT(h->active_index != MAX_KEY); + uint mrr_keyno= h->active_index; + + /* Create a separate handler object to do rndpos() calls. */ + if (!(new_h2= h->clone(thd->mem_root)) || + new_h2->ha_external_lock(thd, F_RDLCK)) + { + delete new_h2; + DBUG_RETURN(1); + } + + if (mrr_keyno == h->pushed_idx_cond_keyno) + pushed_cond= h->pushed_idx_cond; + + /* + Caution: this call will invoke this->dsmrr_close(). Do not put the + created secondary table handler into this->h2 or it will delete it. + */ + if (h->ha_index_end()) + { + h2=new_h2; + goto error; + } + + h2= new_h2; /* Ok, now can put it into h2 */ + table->prepare_for_position(); + h2->extra(HA_EXTRA_KEYREAD); + + if (h2->ha_index_init(mrr_keyno, FALSE)) + goto error; + + use_default_impl= FALSE; + if (pushed_cond) + h2->idx_cond_push(mrr_keyno, pushed_cond); + } + else + { + /* + We get here when the access alternates betwen MRR scan(s) and non-MRR + scans. + + Calling h->index_end() will invoke dsmrr_close() for this object, + which will delete h2. We need to keep it, so save put it away and dont + let it be deleted: + */ + handler *save_h2= h2; + h2= NULL; + int res= (h->inited == handler::INDEX && h->ha_index_end()); + h2= save_h2; + use_default_impl= FALSE; + if (res) + goto error; + } + + if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges, + mode, buf) || + dsmrr_fill_buffer()) + { + goto error; + } + /* + If the above call has scanned through all intervals in *seq, then + adjust *buf to indicate that the remaining buffer space will not be used. + */ + if (dsmrr_eof) + buf->end_of_used_area= rowids_buf_last; + + /* + h->inited == INDEX may occur when 'range checked for each record' is + used. + */ + if ((h->inited != handler::RND) && + ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) || + (h->ha_rnd_init(FALSE)))) + goto error; + + use_default_impl= FALSE; + h->mrr_funcs= *seq_funcs; + + DBUG_RETURN(0); +error: + h2->ha_index_or_rnd_end(); + h2->ha_external_lock(current_thd, F_UNLCK); + h2->close(); + delete h2; + h2= NULL; + DBUG_RETURN(1); +} + + +void DsMrr_impl::dsmrr_close() +{ + DBUG_ENTER("DsMrr_impl::dsmrr_close"); + if (h2) + { + h2->ha_index_or_rnd_end(); + h2->ha_external_lock(current_thd, F_UNLCK); + h2->close(); + delete h2; + h2= NULL; + } + use_default_impl= TRUE; + DBUG_VOID_RETURN; +} + + +static int rowid_cmp(void *h, uchar *a, uchar *b) +{ + return ((handler*)h)->cmp_ref(a, b); +} + + +/** + DS-MRR: Fill the buffer with rowids and sort it by rowid + + {This is an internal function of DiskSweep MRR implementation} + Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into + buffer. When the buffer is full or scan is completed, sort the buffer by + rowid and return. + + The function assumes that rowids buffer is empty when it is invoked. + + @param h Table handler + + @retval 0 OK, the next portion of rowids is in the buffer, + properly ordered + @retval other Error +*/ + +int DsMrr_impl::dsmrr_fill_buffer() +{ + char *range_info; + int res; + DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer"); + + rowids_buf_cur= rowids_buf; + while ((rowids_buf_cur < rowids_buf_end) && + !(res= h2->handler::multi_range_read_next(&range_info))) + { + KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range; + if (h2->mrr_funcs.skip_index_tuple && + h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr)) + continue; + + /* Put rowid, or {rowid, range_id} pair into the buffer */ + h2->position(table->record[0]); + memcpy(rowids_buf_cur, h2->ref, h2->ref_length); + rowids_buf_cur += h2->ref_length; + + if (is_mrr_assoc) + { + memcpy(rowids_buf_cur, &range_info, sizeof(void*)); + rowids_buf_cur += sizeof(void*); + } + } + + if (res && res != HA_ERR_END_OF_FILE) + DBUG_RETURN(res); + dsmrr_eof= test(res == HA_ERR_END_OF_FILE); + + /* Sort the buffer contents by rowid */ + uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); + uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size; + + my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp, + (void*)h); + rowids_buf_last= rowids_buf_cur; + rowids_buf_cur= rowids_buf; + DBUG_RETURN(0); +} + + +/** + DS-MRR implementation: multi_range_read_next() function +*/ + +int DsMrr_impl::dsmrr_next(char **range_info) +{ + int res; + uchar *cur_range_info= 0; + uchar *rowid; + + if (use_default_impl) + return h->handler::multi_range_read_next(range_info); + + do + { + if (rowids_buf_cur == rowids_buf_last) + { + if (dsmrr_eof) + { + res= HA_ERR_END_OF_FILE; + goto end; + } + res= dsmrr_fill_buffer(); + if (res) + goto end; + } + + /* return eof if there are no rowids in the buffer after re-fill attempt */ + if (rowids_buf_cur == rowids_buf_last) + { + res= HA_ERR_END_OF_FILE; + goto end; + } + rowid= rowids_buf_cur; + + if (is_mrr_assoc) + memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**)); + + rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc); + if (h2->mrr_funcs.skip_record && + h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) + continue; + res= h->rnd_pos(table->record[0], rowid); + break; + } while (true); + + if (is_mrr_assoc) + { + memcpy(range_info, rowid + h->ref_length, sizeof(void*)); + } +end: + return res; +} + + +/** + DS-MRR implementation: multi_range_read_info() function +*/ +ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, + uint *bufsz, uint *flags, COST_VECT *cost) +{ + ha_rows res; + uint def_flags= *flags; + uint def_bufsz= *bufsz; + + /* Get cost/flags/mem_usage of default MRR implementation */ + res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz, + &def_flags, cost); + DBUG_ASSERT(!res); + + if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || + choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost)) + { + /* Default implementation is choosen */ + DBUG_PRINT("info", ("Default MRR implementation choosen")); + *flags= def_flags; + *bufsz= def_bufsz; + } + else + { + /* *flags and *bufsz were set by choose_mrr_impl */ + DBUG_PRINT("info", ("DS-MRR implementation choosen")); + } + return 0; +} + + +/** + DS-MRR Implementation: multi_range_read_info_const() function +*/ + +ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, uint n_ranges, + uint *bufsz, uint *flags, COST_VECT *cost) +{ + ha_rows rows; + uint def_flags= *flags; + uint def_bufsz= *bufsz; + /* Get cost/flags/mem_usage of default MRR implementation */ + rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param, + n_ranges, &def_bufsz, + &def_flags, cost); + if (rows == HA_POS_ERROR) + { + /* Default implementation can't perform MRR scan => we can't either */ + return rows; + } + + /* + If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to + use the default MRR implementation (we need it for UPDATE/DELETE). + Otherwise, make a choice based on cost and @@optimizer_use_mrr. + */ + if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || + choose_mrr_impl(keyno, rows, flags, bufsz, cost)) + { + DBUG_PRINT("info", ("Default MRR implementation choosen")); + *flags= def_flags; + *bufsz= def_bufsz; + } + else + { + /* *flags and *bufsz were set by choose_mrr_impl */ + DBUG_PRINT("info", ("DS-MRR implementation choosen")); + } + return rows; +} + + +/** + Check if key has partially-covered columns + + We can't use DS-MRR to perform range scans when the ranges are over + partially-covered keys, because we'll not have full key part values + (we'll have their prefixes from the index) and will not be able to check + if we've reached the end the range. + + @param keyno Key to check + + @todo + Allow use of DS-MRR in cases where the index has partially-covered + components but they are not used for scanning. + + @retval TRUE Yes + @retval FALSE No +*/ + +bool key_uses_partial_cols(TABLE *table, uint keyno) +{ + KEY_PART_INFO *kp= table->key_info[keyno].key_part; + KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts; + for (; kp != kp_end; kp++) + { + if (!kp->field->part_of_key.is_set(keyno)) + return TRUE; + } + return FALSE; +} + +/** + DS-MRR Internals: Choose between Default MRR implementation and DS-MRR + + Make the choice between using Default MRR implementation and DS-MRR. + This function contains common functionality factored out of dsmrr_info() + and dsmrr_info_const(). The function assumes that the default MRR + implementation's applicability requirements are satisfied. + + @param keyno Index number + @param rows E(full rows to be retrieved) + @param flags IN MRR flags provided by the MRR user + OUT If DS-MRR is choosen, flags of DS-MRR implementation + else the value is not modified + @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation + else the value is not modified + @param cost IN Cost of default MRR implementation + OUT If DS-MRR is choosen, cost of DS-MRR scan + else the value is not modified + + @retval TRUE Default MRR implementation should be used + @retval FALSE DS-MRR implementation should be used +*/ + +bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, + uint *bufsz, COST_VECT *cost) +{ + COST_VECT dsmrr_cost; + bool res; + THD *thd= current_thd; + if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY || + (keyno == table->s->primary_key && h->primary_key_is_clustered()) || + key_uses_partial_cols(table, keyno)) + { + /* Use the default implementation */ + *flags |= HA_MRR_USE_DEFAULT_IMPL; + return TRUE; + } + + uint add_len= table->key_info[keyno].key_length + h->ref_length; + *bufsz -= add_len; + if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost)) + return TRUE; + *bufsz += add_len; + + bool force_dsmrr; + /* + If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of + DS-MRR and Default implementations cost. This allows one to force use of + DS-MRR whenever it is applicable without affecting other cost-based + choices. + */ + if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) && + dsmrr_cost.total_cost() > cost->total_cost()) + dsmrr_cost= *cost; + + if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost()) + { + *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */ + *flags &= ~HA_MRR_SORTED; /* We will return unordered output */ + *cost= dsmrr_cost; + res= FALSE; + } + else + { + /* Use the default MRR implementation */ + res= TRUE; + } + return res; +} + + +static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost); + + +/** + Get cost of DS-MRR scan + + @param keynr Index to be used + @param rows E(Number of rows to be scanned) + @param flags Scan parameters (HA_MRR_* flags) + @param buffer_size INOUT Buffer size + @param cost OUT The cost + + @retval FALSE OK + @retval TRUE Error, DS-MRR cannot be used (the buffer is too small + for even 1 rowid) +*/ + +bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, + uint *buffer_size, COST_VECT *cost) +{ + ulong max_buff_entries, elem_size; + ha_rows rows_in_full_step, rows_in_last_step; + uint n_full_steps; + double index_read_cost; + + elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION)); + max_buff_entries = *buffer_size / elem_size; + + if (!max_buff_entries) + return TRUE; /* Buffer has not enough space for even 1 rowid */ + + /* Number of iterations we'll make with full buffer */ + n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries); + + /* + Get numbers of rows we'll be processing in + - non-last sweep, with full buffer + - last iteration, with non-full buffer + */ + rows_in_full_step= max_buff_entries; + rows_in_last_step= rows % max_buff_entries; + + /* Adjust buffer size if we expect to use only part of the buffer */ + if (n_full_steps) + { + get_sort_and_sweep_cost(table, rows, cost); + cost->multiply(n_full_steps); + } + else + { + cost->zero(); + *buffer_size= max(*buffer_size, + (size_t)(1.2*rows_in_last_step) * elem_size + + h->ref_length + table->key_info[keynr].key_length); + } + + COST_VECT last_step_cost; + get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost); + cost->add(&last_step_cost); + + if (n_full_steps != 0) + cost->mem_cost= *buffer_size; + else + cost->mem_cost= (double)rows_in_last_step * elem_size; + + /* Total cost of all index accesses */ + index_read_cost= h->index_only_read_time(keynr, (double)rows); + cost->add_io(index_read_cost, 1 /* Random seeks */); + return FALSE; +} + + +/* + Get cost of one sort-and-sweep step + + SYNOPSIS + get_sort_and_sweep_cost() + table Table being accessed + nrows Number of rows to be sorted and retrieved + cost OUT The cost + + DESCRIPTION + Get cost of these operations: + - sort an array of #nrows ROWIDs using qsort + - read #nrows records from table in a sweep. +*/ + +static +void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost) +{ + if (nrows) + { + get_sweep_read_cost(table, nrows, FALSE, cost); + /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */ + double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID); + if (cmp_op < 3) + cmp_op= 3; + cost->cpu_cost += cmp_op * log2(cmp_op); + } + else + cost->zero(); +} + + +/** + Get cost of reading nrows table records in a "disk sweep" + + A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made + for an ordered sequence of rowids. + + We assume hard disk IO. The read is performed as follows: + + 1. The disk head is moved to the needed cylinder + 2. The controller waits for the plate to rotate + 3. The data is transferred + + Time to do #3 is insignificant compared to #2+#1. + + Time to move the disk head is proportional to head travel distance. + + Time to wait for the plate to rotate depends on whether the disk head + was moved or not. + + If disk head wasn't moved, the wait time is proportional to distance + between the previous block and the block we're reading. + + If the head was moved, we don't know how much we'll need to wait for the + plate to rotate. We assume the wait time to be a variate with a mean of + 0.5 of full rotation time. + + Our cost units are "random disk seeks". The cost of random disk seek is + actually not a constant, it depends one range of cylinders we're going + to access. We make it constant by introducing a fuzzy concept of "typical + datafile length" (it's fuzzy as it's hard to tell whether it should + include index file, temp.tables etc). Then random seek cost is: + + 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length + + We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9. + + @param table Table to be accessed + @param nrows Number of rows to retrieve + @param interrupted TRUE <=> Assume that the disk sweep will be + interrupted by other disk IO. FALSE - otherwise. + @param cost OUT The cost. +*/ + +void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, + COST_VECT *cost) +{ + DBUG_ENTER("get_sweep_read_cost"); + + cost->zero(); + if (table->file->primary_key_is_clustered()) + { + cost->io_count= table->file->read_time(table->s->primary_key, + (uint) nrows, nrows); + } + else + { + double n_blocks= + ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE); + double busy_blocks= + n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows))); + if (busy_blocks < 1.0) + busy_blocks= 1.0; + + DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks, + busy_blocks)); + cost->io_count= busy_blocks; + + if (!interrupted) + { + /* Assume reading is done in one 'sweep' */ + cost->avg_io_cost= (DISK_SEEK_BASE_COST + + DISK_SEEK_PROP_COST*n_blocks/busy_blocks); + } + } + DBUG_PRINT("info",("returning cost=%g", cost->total_cost())); + DBUG_VOID_RETURN; +} + + +/* ************************************************************************** + * DS-MRR implementation ends + ***************************************************************************/ + + |