diff options
author | Sergey Petrunya <psergey@askmonty.org> | 2010-12-13 13:42:40 +0300 |
---|---|---|
committer | Sergey Petrunya <psergey@askmonty.org> | 2010-12-13 13:42:40 +0300 |
commit | 58b646001a1d9b00ca8e41bd2a6826ed40f81b5f (patch) | |
tree | 57c9062984c94b611fc88a13b119ce2fc57149d0 /sql/multi_range_read.cc | |
parent | 419d524ff9013534d522eca8fedee3ead2c403d5 (diff) | |
parent | 14ca046833e3cbcd409b6a83c193e5fc16b5e94b (diff) | |
download | mariadb-git-58b646001a1d9b00ca8e41bd2a6826ed40f81b5f.tar.gz |
Merge DS-MRR/CPK improvements into 5.3-main
Diffstat (limited to 'sql/multi_range_read.cc')
-rw-r--r-- | sql/multi_range_read.cc | 1185 |
1 files changed, 977 insertions, 208 deletions
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index 70fcbd605e6..321298fafa1 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -1,4 +1,5 @@ #include "mysql_priv.h" +#include <my_bit.h> #include "sql_select.h" /**************************************************************************** @@ -136,10 +137,16 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, */ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, - uint *bufsz, uint *flags, COST_VECT *cost) + uint key_parts, uint *bufsz, + uint *flags, COST_VECT *cost) { - *bufsz= 0; /* Default implementation doesn't need a buffer */ + /* + Currently we expect this function to be called only in preparation of scan + with HA_MRR_SINGLE_POINT property. + */ + DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT); + *bufsz= 0; /* Default implementation doesn't need a buffer */ *flags |= HA_MRR_USE_DEFAULT_IMPL; cost->zero(); @@ -207,7 +214,6 @@ handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param, DBUG_RETURN(0); } - /** Get next record in MRR scan @@ -277,7 +283,455 @@ scan_it_again: } /**************************************************************************** - * DS-MRR implementation + * Mrr_*_reader classes (building blocks for DS-MRR) + ***************************************************************************/ + +int Mrr_simple_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, + uint mode, Key_parameters *key_par_arg, + Lifo_buffer *key_buffer_arg, + Buffer_manager *buf_manager_arg) +{ + HANDLER_BUFFER no_buffer = {NULL, NULL, NULL}; + file= h_arg; + return file->handler::multi_range_read_init(seq_funcs, seq_init_param, + n_ranges, mode, &no_buffer); +} + + +int Mrr_simple_index_reader::get_next(char **range_info) +{ + int res; + while (!(res= file->handler::multi_range_read_next(range_info))) + { + KEY_MULTI_RANGE *curr_range= &file->handler::mrr_cur_range; + if (!file->mrr_funcs.skip_index_tuple || + !file->mrr_funcs.skip_index_tuple(file->mrr_iter, curr_range->ptr)) + break; + } + return res; +} + + +/** + @brief Get next index record + + @param range_info OUT identifier of range that the returned record belongs to + + @note + We actually iterate over nested sequences: + - an ordered sequence of groups of identical keys + - each key group has key value, which has multiple matching records + - thus, each record matches all members of the key group + + @retval 0 OK, next record was successfully read + @retval HA_ERR_END_OF_FILE End of records + @retval Other Some other error +*/ + +int Mrr_ordered_index_reader::get_next(char **range_info) +{ + int res; + DBUG_ENTER("Mrr_ordered_index_reader::get_next"); + + for(;;) + { + if (!scanning_key_val_iter) + { + while ((res= kv_it.init(this))) + { + if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE)) + DBUG_RETURN(res); /* Some fatal error */ + + if (key_buffer->is_empty()) + { + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + } + scanning_key_val_iter= TRUE; + } + + if ((res= kv_it.get_next())) + { + scanning_key_val_iter= FALSE; + if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE)) + DBUG_RETURN(res); + kv_it.move_to_next_key_value(); + continue; + } + char *range_info; + memcpy(&range_info, cur_range_info, sizeof(char*)); + if (!skip_index_tuple(range_info) && + !skip_record(range_info, NULL)) + { + break; + } + /* Go get another (record, range_id) combination */ + } /* while */ + + memcpy(range_info, cur_range_info, sizeof(void*)); + DBUG_RETURN(0); +} + +void Mrr_ordered_index_reader::set_temp_space(uchar *space) +{ + //saved_key_tuple= space; + saved_rowid= space; + have_saved_rowid= FALSE; +} + +void Mrr_ordered_index_reader::interrupt_read() +{ + /* + key_copy(saved_key_tuple, file->get_table()->record[0], + &file->get_table()->key_info[file->active_index], + keypar.key_tuple_length); + */ + /* Save the last rowid */ + memcpy(saved_rowid, file->ref, file->ref_length); + have_saved_rowid= TRUE; +} + +void Mrr_ordered_index_reader::position() +{ + if (have_saved_rowid) + memcpy(file->ref, saved_rowid, file->ref_length); + else + Mrr_index_reader::position(); +} + +void Mrr_ordered_index_reader::resume_read() +{ + /* + key_restore(file->get_table()->record[0], saved_key_tuple, + &file->get_table()->key_info[file->active_index], + keypar.key_tuple_length); + */ +} + + +/** + Fill the buffer with (lookup_tuple, range_id) pairs and sort +*/ + +int Mrr_ordered_index_reader::refill_buffer(bool initial) +{ + KEY_MULTI_RANGE cur_range; + uchar **range_info_ptr= (uchar**)&cur_range.ptr; + uchar *key_ptr; + DBUG_ENTER("Mrr_ordered_index_reader::refill_buffer"); + + DBUG_ASSERT(key_buffer->is_empty()); + + if (source_exhausted) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + //if (know_key_tuple_params) + { + buf_manager->reset_buffer_sizes(buf_manager->arg); + key_buffer->reset(); + key_buffer->setup_writing(&key_ptr, keypar.key_size_in_keybuf, + is_mrr_assoc? (uchar**)&range_info_ptr : NULL, + sizeof(uchar*)); + } + + while (key_buffer->can_write() && + !(source_exhausted= (bool)mrr_funcs.next(mrr_iter, &cur_range))) + { + DBUG_ASSERT(cur_range.range_flag & EQ_RANGE); + + /* Put key, or {key, range_id} pair into the buffer */ + key_ptr= (keypar.use_key_pointers)? (uchar*)&cur_range.start_key.key : + (uchar*)cur_range.start_key.key; + + key_buffer->write(); + } + + /* Force get_next() to start with kv_it.init() call: */ + scanning_key_val_iter= FALSE; + + if (source_exhausted && key_buffer->is_empty()) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + key_buffer->sort((key_buffer->type() == Lifo_buffer::FORWARD)? + (qsort2_cmp)Mrr_ordered_index_reader::compare_keys_reverse : + (qsort2_cmp)Mrr_ordered_index_reader::compare_keys, + this); + DBUG_RETURN(0); +} + + +int Mrr_ordered_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, + uint mode, Key_parameters *key_par_arg, + Lifo_buffer *key_buffer_arg, + Buffer_manager *buf_manager_arg) +{ + file= h_arg; + key_buffer= key_buffer_arg; + buf_manager= buf_manager_arg; + keypar= *key_par_arg; + + KEY *key_info= &file->get_table()->key_info[file->active_index]; + keypar.index_ranges_unique= test(key_info->flags & HA_NOSAME && + key_info->key_parts == + my_count_bits(keypar.key_tuple_map)); + + mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); + is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION); + mrr_funcs= *seq_funcs; + source_exhausted= FALSE; + /* + Short: don't do identical key handling when we have a pushed index + condition. + + Long: In order to check pushed index condition, we need to have both + index tuple table->record[0] and range_id. + + Key_value_records_iterator has special handling for case when we have + multiple (key_value, range_id) pairs with the same key_value. In that + case it will make an index lookup only for the first such element, + for subsequent elements it will only return the new range_id. + + The problem here is that file->table->record[0] is shared with the part + that does full record retrieval with rnd_pos() calls, and if we have the + following scenario: + + 1. We scan ranges {(key_value, range_id1), (key_value, range_id2)} + 2. Iterator makes a lookup with key_value, produces the (index_tuple, + range_id1) pair. Index tuple is read into table->record[0], which + allows us to check index condition. + 3. At this point, we figure that key buffer is full, so we sort it, + and return control to Mrr_ordered_rndpos_reader. + 3.1 Mrr_ordered_rndpos_reader gets rowids and makes rnd_pos() calls, which + puts some arbitrary data into table->record[0] in the process. + 3.2 We ask the iterator for the next (rowid, range_id) pair. The iterator + puts in range_id2, and that shuld be sufficient (this is identical key + handling at work) + However, index tuple in table->record[0] has been destroyed and we + can't check index conditon for (index_tuple, range_id2) now. + + TODO: It is possible to support identical key handling and index condition + pushdown, working together (one possible solution is to save/restore the + contents of table->record[0]). We will probably implement that. + + */ + disallow_identical_key_handling= test(mrr_funcs.skip_index_tuple); + /*bzero(saved_key_tuple, keypar.key_tuple_length);*/ + have_saved_rowid= FALSE; + return 0; +} + + +static int rowid_cmp_reverse(void *file, uchar *a, uchar *b) +{ + return - ((handler*)file)->cmp_ref(a, b); +} + + +int Mrr_ordered_rndpos_reader::init(handler *h_arg, + Mrr_index_reader *index_reader_arg, + uint mode, + Lifo_buffer *buf) +{ + file= h_arg; + index_reader= index_reader_arg; + rowid_buffer= buf; + is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION); + index_reader_exhausted= FALSE; + index_reader_needs_refill= TRUE; + return 0; +} + + +/** + DS-MRR: Fill and sort the rowid buffer + + Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into + buffer. When the buffer is full or scan is completed, sort the buffer by + rowid and return. + + When this function returns, either rowid buffer is not empty, or the source + of lookup keys (i.e. ranges) is exhaused. + + @retval 0 OK, the next portion of rowids is in the buffer, + properly ordered + @retval other Error +*/ + +int Mrr_ordered_rndpos_reader::refill_buffer(bool initial) +{ + int res; + DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_buffer"); + + if (index_reader_exhausted) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + while (initial || index_reader_needs_refill || + (res= refill_from_index_reader()) == HA_ERR_END_OF_FILE) + { + if ((res= index_reader->refill_buffer(initial))) + { + if (res == HA_ERR_END_OF_FILE) + index_reader_exhausted= TRUE; + break; + } + initial= FALSE; + index_reader_needs_refill= FALSE; + } + DBUG_RETURN(res); +} + + +void Mrr_index_reader::position() +{ + file->position(file->get_table()->record[0]); +} + + +/* + @brief Try to refill the rowid buffer without calling + index_reader->refill_buffer(). +*/ + +int Mrr_ordered_rndpos_reader::refill_from_index_reader() +{ + char *range_info; + uchar **range_info_ptr= (uchar**)&range_info; + int res; + DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_from_index_reader"); + + DBUG_ASSERT(rowid_buffer->is_empty()); + index_rowid= index_reader->get_rowid_ptr(); + rowid_buffer->reset(); + rowid_buffer->setup_writing(&index_rowid, file->ref_length, + is_mrr_assoc? (uchar**)&range_info_ptr: NULL, + sizeof(void*)); + + last_identical_rowid= NULL; + + index_reader->resume_read(); + while (rowid_buffer->can_write()) + { + res= index_reader->get_next(&range_info); + + if (res) + { + if (res != HA_ERR_END_OF_FILE) + DBUG_RETURN(res); + index_reader_needs_refill=TRUE; + break; + } + + index_reader->position(); + + /* Put rowid, or {rowid, range_id} pair into the buffer */ + rowid_buffer->write(); + } + + index_reader->interrupt_read(); + /* Sort the buffer contents by rowid */ + rowid_buffer->sort((qsort2_cmp)rowid_cmp_reverse, (void*)file); + + rowid_buffer->setup_reading(&rowid, file->ref_length, + is_mrr_assoc? (uchar**)&rowids_range_id: NULL, + sizeof(void*)); + DBUG_RETURN(rowid_buffer->is_empty()? HA_ERR_END_OF_FILE : 0); +} + + +/* + Get the next {record, range_id} using ordered array of rowid+range_id pairs + + @note + Since we have sorted rowids, we try not to make multiple rnd_pos() calls + with the same rowid value. +*/ + +int Mrr_ordered_rndpos_reader::get_next(char **range_info) +{ + int res; + + /* + First, check if rowid buffer has elements with the same rowid value as + the previous. + */ + while (last_identical_rowid) + { + /* + Current record (the one we've returned in previous call) was obtained + from a rowid that matched multiple range_ids. Return this record again, + with next matching range_id. + */ + (void)rowid_buffer->read(); + + if (rowid == last_identical_rowid) + last_identical_rowid= NULL; /* reached the last of identical rowids */ + + if (!is_mrr_assoc) + return 0; + + memcpy(range_info, rowids_range_id, sizeof(uchar*)); + if (!index_reader->skip_record((char*)*range_info, rowid)) + return 0; + } + + /* + Ok, last_identical_rowid==NULL, it's time to read next different rowid + value and get record for it. + */ + for(;;) + { + /* Return eof if there are no rowids in the buffer after re-fill attempt */ + if (rowid_buffer->read()) + return HA_ERR_END_OF_FILE; + + if (is_mrr_assoc) + { + memcpy(range_info, rowids_range_id, sizeof(uchar*)); + + if (index_reader->skip_record(*range_info, rowid)) + continue; + } + + res= file->ha_rnd_pos(file->get_table()->record[0], rowid); + + if (res == HA_ERR_RECORD_DELETED) + { + /* not likely to get this code with current storage engines, but still */ + continue; + } + + if (res) + return res; /* Some fatal error */ + + break; /* Got another record */ + } + + /* + Check if subsequent buffer elements have the same rowid value as this + one. If yes, remember this fact so that we don't make any more rnd_pos() + calls with this value. + */ + uchar *cur_rowid= rowid; + /* + Note: this implies that SQL layer doesn't touch table->record[0] + between calls. + */ + Lifo_buffer_iterator it; + it.init(rowid_buffer); + while (!it.read()) // reads to (rowid, ...) + { + if (file->cmp_ref(rowid, cur_rowid)) + break; + last_identical_rowid= rowid; + } + return 0; +} + + +/**************************************************************************** + * Top-level DS-MRR implementation functions (the ones called by storage engine) ***************************************************************************/ /** @@ -286,7 +740,7 @@ scan_it_again: Initialize and start the MRR scan. Depending on the mode parameter, this may use default or DS-MRR implementation. - @param h Table handler to be used + @param h_arg Table handler to be used @param key Index to be used @param seq_funcs Interval sequence enumeration functions @param seq_init_param Interval sequence enumeration parameter @@ -302,279 +756,563 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, void *seq_init_param, uint n_ranges, uint mode, HANDLER_BUFFER *buf) { - uint elem_size; - Item *pushed_cond= NULL; - handler *new_h2= 0; + THD *thd= current_thd; + int res; + Key_parameters keypar; + uint key_buff_elem_size; + handler *h_idx; + Mrr_ordered_rndpos_reader *disk_strategy= NULL; + bool do_sort_keys= FALSE; DBUG_ENTER("DsMrr_impl::dsmrr_init"); /* index_merge may invoke a scan on an object for which dsmrr_info[_const] has not been called, so set the owner handler here as well. */ - h= h_arg; - if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED) + primary_file= h_arg; + is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION); + + strategy_exhausted= FALSE; + + /* By default, have do-nothing buffer manager */ + buf_manager.arg= this; + buf_manager.reset_buffer_sizes= do_nothing; + buf_manager.redistribute_buffer_space= do_nothing; + + if (mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED)) + goto use_default_impl; + + /* + Determine whether we'll need to do key sorting and/or rnd_pos() scan + */ + index_strategy= NULL; + if ((mode & HA_MRR_SINGLE_POINT) && + optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)) { - use_default_impl= TRUE; - const int retval= - h->handler::multi_range_read_init(seq_funcs, seq_init_param, - n_ranges, mode, buf); - DBUG_RETURN(retval); + do_sort_keys= TRUE; + index_strategy= &reader_factory.ordered_index_reader; } - rowids_buf= buf->buffer; + else + index_strategy= &reader_factory.simple_index_reader; - is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION); + strategy= index_strategy; + /* + We don't need a rowid-to-rndpos step if + - We're doing a scan on clustered primary key + - [In the future] We're doing an index_only read + */ + DBUG_ASSERT(primary_file->inited == handler::INDEX || + (primary_file->inited == handler::RND && + secondary_file && + secondary_file->inited == handler::INDEX)); + + h_idx= (primary_file->inited == handler::INDEX)? primary_file: secondary_file; + keyno= h_idx->active_index; + + if (!(keyno == table->s->primary_key && h_idx->primary_key_is_clustered())) + { + strategy= disk_strategy= &reader_factory.ordered_rndpos_reader; + } if (is_mrr_assoc) - status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count); - - rowids_buf_end= buf->buffer_end; - elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); - rowids_buf_last= rowids_buf + - ((rowids_buf_end - rowids_buf)/ elem_size)* - elem_size; - rowids_buf_end= rowids_buf_last; + status_var_increment(thd->status_var.ha_multi_range_read_init_count); - /* - There can be two cases: - - This is the first call since index_init(), h2==NULL - Need to setup h2 then. - - This is not the first call, h2 is initalized and set up appropriately. - The caller might have called h->index_init(), need to switch h to - rnd_pos calls. + full_buf= buf->buffer; + full_buf_end= buf->buffer_end; + + if (do_sort_keys) + { + /* Pre-calculate some parameters of key sorting */ + keypar.use_key_pointers= test(mode & HA_MRR_MATERIALIZED_KEYS); + seq_funcs->get_key_info(seq_init_param, &keypar.key_tuple_length, + &keypar.key_tuple_map); + keypar.key_size_in_keybuf= keypar.use_key_pointers? + sizeof(char*) : keypar.key_tuple_length; + key_buff_elem_size= keypar.key_size_in_keybuf + (int)is_mrr_assoc * sizeof(void*); + + /* Ordered index reader needs some space to store an index tuple */ + if (strategy != index_strategy) + { + if (full_buf_end - full_buf <= (ptrdiff_t)primary_file->ref_length/*keypar.key_tuple_length*/) + goto use_default_impl; + reader_factory.ordered_index_reader.set_temp_space(full_buf); + //full_buf += keypar.key_tuple_length; + full_buf += primary_file->ref_length; + } + } + + if (strategy == index_strategy) + { + /* + Index strategy alone handles the record retrieval. Give all buffer space + to it. Key buffer should have forward orientation so we can return the + end of it. + */ + key_buffer= &forward_key_buf; + key_buffer->set_buffer_space(full_buf, full_buf_end); + + /* Safety: specify that rowid buffer has zero size: */ + rowid_buffer.set_buffer_space(full_buf_end, full_buf_end); + + if (do_sort_keys && !key_buffer->have_space_for(key_buff_elem_size)) + goto use_default_impl; + + if ((res= index_strategy->init(primary_file, seq_funcs, seq_init_param, n_ranges, + mode, &keypar, key_buffer, &buf_manager))) + goto error; + } + else + { + /* We'll have both index and rndpos strategies working together */ + if (do_sort_keys) + { + /* Both strategies will need buffer space, share the buffer */ + if (setup_buffer_sharing(keypar.key_size_in_keybuf, keypar.key_tuple_map)) + goto use_default_impl; + + buf_manager.reset_buffer_sizes= reset_buffer_sizes; + buf_manager.redistribute_buffer_space= redistribute_buffer_space; + } + else + { + /* index strategy doesn't need buffer, give all space to rowids*/ + rowid_buffer.set_buffer_space(full_buf, full_buf_end); + if (!rowid_buffer.have_space_for(primary_file->ref_length + + (int)is_mrr_assoc * sizeof(char*))) + goto use_default_impl; + } + + if ((res= setup_two_handlers())) + goto error; + + if ((res= index_strategy->init(secondary_file, seq_funcs, seq_init_param, + n_ranges, mode, &keypar, key_buffer, + &buf_manager)) || + (res= disk_strategy->init(primary_file, index_strategy, mode, + &rowid_buffer))) + { + goto error; + } + } + + res= strategy->refill_buffer(TRUE); + if (res) + { + if (res != HA_ERR_END_OF_FILE) + goto error; + strategy_exhausted= TRUE; + } + + /* + If we have scanned through all intervals in *seq, then adjust *buf to + indicate that the remaining buffer space will not be used. */ - if (!h2) +// if (dsmrr_eof) +// buf->end_of_used_area= rowid_buffer.end_of_space(); + + + DBUG_RETURN(0); +error: + close_second_handler(); + /* Safety, not really needed but: */ + strategy= NULL; + DBUG_RETURN(1); + +use_default_impl: + DBUG_ASSERT(primary_file->inited == handler::INDEX); + /* Call correct init function and assign to top level object */ + Mrr_simple_index_reader *s= &reader_factory.simple_index_reader; + res= s->init(primary_file, seq_funcs, seq_init_param, n_ranges, mode, NULL, + NULL, NULL); + strategy= s; + DBUG_RETURN(res); +} + + +/* + Whatever the current state is, make it so that we have two handler objects: + - primary_file - initialized for rnd_pos() scan + - secondary_file - initialized for scanning the index specified in + this->keyno + RETURN + 0 OK + HA_XXX Error code +*/ + +int DsMrr_impl::setup_two_handlers() +{ + int res; + THD *thd= primary_file->get_table()->in_use; + DBUG_ENTER("DsMrr_impl::setup_two_handlers"); + if (!secondary_file) { - /* Create a separate handler object to do rndpos() calls. */ - THD *thd= current_thd; + handler *new_h2; + Item *pushed_cond= NULL; + DBUG_ASSERT(primary_file->inited == handler::INDEX); + /* Create a separate handler object to do rnd_pos() calls. */ /* ::clone() takes up a lot of stack, especially on 64 bit platforms. The constant 5 is an empiric result. */ if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) DBUG_RETURN(1); - DBUG_ASSERT(h->active_index != MAX_KEY); - uint mrr_keyno= h->active_index; - /* Create a separate handler object to do rndpos() calls. */ - if (!(new_h2= h->clone(thd->mem_root)) || + /* Create a separate handler object to do rnd_pos() calls. */ + if (!(new_h2= primary_file->clone(thd->mem_root)) || new_h2->ha_external_lock(thd, F_RDLCK)) { delete new_h2; DBUG_RETURN(1); } - if (mrr_keyno == h->pushed_idx_cond_keyno) - pushed_cond= h->pushed_idx_cond; - + if (keyno == primary_file->pushed_idx_cond_keyno) + pushed_cond= primary_file->pushed_idx_cond; + + Mrr_reader *save_strategy= strategy; + strategy= NULL; /* Caution: this call will invoke this->dsmrr_close(). Do not put the - created secondary table handler into this->h2 or it will delete it. + created secondary table handler new_h2 into this->secondary_file or it + will delete it. Also, save the picked strategy */ - if (h->ha_index_end()) - { - h2=new_h2; + res= primary_file->ha_index_end(); + + strategy= save_strategy; + secondary_file= new_h2; + + if (res || (res= (primary_file->ha_rnd_init(FALSE)))) goto error; - } - h2= new_h2; /* Ok, now can put it into h2 */ table->prepare_for_position(); - h2->extra(HA_EXTRA_KEYREAD); - - if (h2->ha_index_init(mrr_keyno, FALSE)) + secondary_file->extra(HA_EXTRA_KEYREAD); + secondary_file->mrr_iter= primary_file->mrr_iter; + + if ((res= secondary_file->ha_index_init(keyno, FALSE))) goto error; - use_default_impl= FALSE; if (pushed_cond) - h2->idx_cond_push(mrr_keyno, pushed_cond); + secondary_file->idx_cond_push(keyno, pushed_cond); } else { + DBUG_ASSERT(secondary_file && secondary_file->inited==handler::INDEX); /* We get here when the access alternates betwen MRR scan(s) and non-MRR scans. - Calling h->index_end() will invoke dsmrr_close() for this object, - which will delete h2. We need to keep it, so save put it away and dont + Calling primary_file->index_end() will invoke dsmrr_close() for this object, + which will delete secondary_file. We need to keep it, so put it away and dont let it be deleted: */ - handler *save_h2= h2; - h2= NULL; - int res= (h->inited == handler::INDEX && h->ha_index_end()); - h2= save_h2; - use_default_impl= FALSE; - if (res) + if (primary_file->inited == handler::INDEX) + { + handler *save_h2= secondary_file; + Mrr_reader *save_strategy= strategy; + secondary_file= NULL; + strategy= NULL; + res= primary_file->ha_index_end(); + secondary_file= save_h2; + strategy= save_strategy; + if (res) + goto error; + } + if ((primary_file->inited != handler::RND) && primary_file->ha_rnd_init(FALSE)) goto error; } + DBUG_RETURN(0); - if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges, - mode, buf) || - dsmrr_fill_buffer()) - { - goto error; - } - /* - If the above call has scanned through all intervals in *seq, then - adjust *buf to indicate that the remaining buffer space will not be used. - */ - if (dsmrr_eof) - buf->end_of_used_area= rowids_buf_last; +error: + DBUG_RETURN(res); +} - /* - h->inited == INDEX may occur when 'range checked for each record' is - used. - */ - if ((h->inited != handler::RND) && - ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) || - (h->ha_rnd_init(FALSE)))) - goto error; - use_default_impl= FALSE; - h->mrr_funcs= *seq_funcs; - - DBUG_RETURN(0); -error: - h2->ha_index_or_rnd_end(); - h2->ha_external_lock(current_thd, F_UNLCK); - h2->close(); - delete h2; - h2= NULL; - DBUG_RETURN(1); +void DsMrr_impl::close_second_handler() +{ + if (secondary_file) + { + secondary_file->ha_index_or_rnd_end(); + secondary_file->ha_external_lock(current_thd, F_UNLCK); + secondary_file->close(); + delete secondary_file; + secondary_file= NULL; + } } void DsMrr_impl::dsmrr_close() { DBUG_ENTER("DsMrr_impl::dsmrr_close"); - if (h2) + close_second_handler(); + strategy= NULL; + DBUG_VOID_RETURN; +} + + +/* + my_qsort2-compatible static member function to compare key tuples +*/ + +int Mrr_ordered_index_reader::compare_keys(void* arg, uchar* key1_arg, + uchar* key2_arg) +{ + Mrr_ordered_index_reader *reader= (Mrr_ordered_index_reader*)arg; + TABLE *table= reader->file->get_table(); + KEY_PART_INFO *part= table->key_info[reader->file->active_index].key_part; + uchar *key1, *key2; + + if (reader->keypar.use_key_pointers) { - h2->ha_index_or_rnd_end(); - h2->ha_external_lock(current_thd, F_UNLCK); - h2->close(); - delete h2; - h2= NULL; + /* the buffer stores pointers to keys, get to the keys */ + memcpy(&key1, key1_arg, sizeof(char*)); + memcpy(&key2, key2_arg, sizeof(char*)); } - use_default_impl= TRUE; - DBUG_VOID_RETURN; + else + { + key1= key1_arg; + key2= key2_arg; + } + + return key_tuple_cmp(part, key1, key2, reader->keypar.key_tuple_length); } -static int rowid_cmp(void *h, uchar *a, uchar *b) +int Mrr_ordered_index_reader::compare_keys_reverse(void* arg, uchar* key1, + uchar* key2) { - return ((handler*)h)->cmp_ref(a, b); + return -compare_keys(arg, key1, key2); } /** - DS-MRR: Fill the buffer with rowids and sort it by rowid + Set the buffer space to be shared between rowid and key buffer + + @return FALSE ok + @return TRUE There is so little buffer space that we won't be able to use + the strategy. + This happens when we don't have enough space for one rowid + element and one key element so this is mainly targeted at + testing. +*/ - {This is an internal function of DiskSweep MRR implementation} - Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into - buffer. When the buffer is full or scan is completed, sort the buffer by - rowid and return. +bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf, + key_part_map key_tuple_map) +{ + long key_buff_elem_size= key_size_in_keybuf + + (int)is_mrr_assoc * sizeof(void*); - The function assumes that rowids buffer is empty when it is invoked. + KEY *key_info= &primary_file->get_table()->key_info[keyno]; + /* + Ok if we got here we need to allocate one part of the buffer + for keys and another part for rowids. + */ + ulonglong rowid_buf_elem_size= primary_file->ref_length + + (int)is_mrr_assoc * sizeof(char*); - @param h Table handler + /* + Use rec_per_key statistics as a basis to find out how many rowids + we'll get for each key value. + TODO: what should be the default value to use when there is no + statistics? + */ + uint parts= my_count_bits(key_tuple_map); + ulong rpc; + ulonglong rowids_size= rowid_buf_elem_size; + if ((rpc= key_info->rec_per_key[parts - 1])) + rowids_size= rowid_buf_elem_size * rpc; + + double fraction_for_rowids= + (ulonglong2double(rowids_size) / + (ulonglong2double(rowids_size) + key_buff_elem_size)); + + size_t bytes_for_rowids= + round(fraction_for_rowids * (full_buf_end - full_buf)); + + long bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids; - @retval 0 OK, the next portion of rowids is in the buffer, - properly ordered - @retval other Error -*/ + if (bytes_for_keys < key_buff_elem_size + 1) + { + long add= key_buff_elem_size + 1 - bytes_for_keys; + bytes_for_keys= key_buff_elem_size + 1; + bytes_for_rowids -= add; + } -int DsMrr_impl::dsmrr_fill_buffer() + if (bytes_for_rowids < rowid_buf_elem_size + 1) + { + long add= rowid_buf_elem_size + 1 - bytes_for_rowids; + bytes_for_rowids= rowid_buf_elem_size + 1; + bytes_for_keys -= add; + } + + rowid_buffer_end= full_buf + bytes_for_rowids; + rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end); + key_buffer= &backward_key_buf; + key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end); + + if (!key_buffer->have_space_for(key_buff_elem_size) || + !rowid_buffer.have_space_for(rowid_buf_elem_size)) + return TRUE; /* Failed to provide minimum space for one of the buffers */ + + return FALSE; +} + + +void DsMrr_impl::do_nothing(void *dsmrr_arg) { - char *range_info; - int res; - DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer"); + /* Do nothing */ +} - rowids_buf_cur= rowids_buf; - while ((rowids_buf_cur < rowids_buf_end) && - !(res= h2->handler::multi_range_read_next(&range_info))) - { - KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range; - if (h2->mrr_funcs.skip_index_tuple && - h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr)) - continue; - - /* Put rowid, or {rowid, range_id} pair into the buffer */ - h2->position(table->record[0]); - memcpy(rowids_buf_cur, h2->ref, h2->ref_length); - rowids_buf_cur += h2->ref_length; - if (is_mrr_assoc) - { - memcpy(rowids_buf_cur, &range_info, sizeof(void*)); - rowids_buf_cur += sizeof(void*); - } - } +void DsMrr_impl::reset_buffer_sizes(void *dsmrr_arg) +{ + DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg; + dsmrr->rowid_buffer.set_buffer_space(dsmrr->full_buf, + dsmrr->rowid_buffer_end); + dsmrr->key_buffer->set_buffer_space(dsmrr->rowid_buffer_end, + dsmrr->full_buf_end); +} - if (res && res != HA_ERR_END_OF_FILE) - DBUG_RETURN(res); - dsmrr_eof= test(res == HA_ERR_END_OF_FILE); - /* Sort the buffer contents by rowid */ - uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); - uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size; - - my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp, - (void*)h); - rowids_buf_last= rowids_buf_cur; - rowids_buf_cur= rowids_buf; - DBUG_RETURN(0); +/* + Take unused space from the key buffer and give it to the rowid buffer +*/ + +void DsMrr_impl::redistribute_buffer_space(void *dsmrr_arg) +{ + DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg; + uchar *unused_start, *unused_end; + dsmrr->key_buffer->remove_unused_space(&unused_start, &unused_end); + dsmrr->rowid_buffer.grow(unused_start, unused_end); } -/** - DS-MRR implementation: multi_range_read_next() function +/* + @brief Initialize the iterator + + @note + Initialize the iterator to produce matches for the key of the first element + in owner_arg->key_buffer + + @retval 0 OK + @retval HA_ERR_END_OF_FILE Either the owner->key_buffer is empty or + no matches for the key we've tried (check + key_buffer->is_empty() to tell these apart) + @retval other code Fatal error */ -int DsMrr_impl::dsmrr_next(char **range_info) +int Key_value_records_iterator::init(Mrr_ordered_index_reader *owner_arg) { int res; - uchar *cur_range_info= 0; - uchar *rowid; + owner= owner_arg; + + identical_key_it.init(owner->key_buffer); + /* Get the first pair into (cur_index_tuple, cur_range_info) */ + owner->key_buffer->setup_reading(&cur_index_tuple, + owner->keypar.key_size_in_keybuf, + owner->is_mrr_assoc? + (uchar**)&owner->cur_range_info: NULL, + sizeof(void*)); + + if (identical_key_it.read()) + return HA_ERR_END_OF_FILE; + + uchar *key_in_buf= cur_index_tuple; - if (use_default_impl) - return h->handler::multi_range_read_next(range_info); + last_identical_key_ptr= cur_index_tuple; + if (owner->keypar.use_key_pointers) + memcpy(&cur_index_tuple, key_in_buf, sizeof(char*)); - do + /* Check out how many more identical keys are following */ + uchar *save_cur_index_tuple= cur_index_tuple; + while (!identical_key_it.read()) + { + if (owner->disallow_identical_key_handling || + Mrr_ordered_index_reader::compare_keys(owner, key_in_buf, + cur_index_tuple)) + break; + last_identical_key_ptr= cur_index_tuple; + } + identical_key_it.init(owner->key_buffer); + cur_index_tuple= save_cur_index_tuple; + res= owner->file->ha_index_read_map(owner->file->get_table()->record[0], + cur_index_tuple, + owner->keypar.key_tuple_map, + HA_READ_KEY_EXACT); + + if (res) { - if (rowids_buf_cur == rowids_buf_last) + /* Failed to find any matching records */ + move_to_next_key_value(); + return res; + } + owner->have_saved_rowid= FALSE; + get_next_row= FALSE; + return 0; +} + + +int Key_value_records_iterator::get_next() +{ + int res; + + if (get_next_row) + { + if (owner->keypar.index_ranges_unique) { - if (dsmrr_eof) - { - res= HA_ERR_END_OF_FILE; - goto end; - } - res= dsmrr_fill_buffer(); - if (res) - goto end; + /* We're using a full unique key, no point to call index_next_same */ + return HA_ERR_END_OF_FILE; } - - /* return eof if there are no rowids in the buffer after re-fill attempt */ - if (rowids_buf_cur == rowids_buf_last) + + handler *h= owner->file; + if ((res= h->ha_index_next_same(h->get_table()->record[0], + cur_index_tuple, + owner->keypar.key_tuple_length))) { - res= HA_ERR_END_OF_FILE; - goto end; + /* It's either HA_ERR_END_OF_FILE or some other error */ + return res; } - rowid= rowids_buf_cur; + identical_key_it.init(owner->key_buffer); + owner->have_saved_rowid= FALSE; + get_next_row= FALSE; + } - if (is_mrr_assoc) - memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**)); + identical_key_it.read(); /* This gets us next range_id */ + if (!last_identical_key_ptr || (cur_index_tuple == last_identical_key_ptr)) + { + /* + We've reached the last of the identical keys that current record is a + match for. Set get_next_row=TRUE so that we read the next index record + on the next call to this function. + */ + get_next_row= TRUE; + } + return 0; +} - rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc); - if (h2->mrr_funcs.skip_record && - h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) - continue; - res= h->ha_rnd_pos(table->record[0], rowid); - break; - } while (true); - - if (is_mrr_assoc) + +void Key_value_records_iterator::move_to_next_key_value() +{ + while (!owner->key_buffer->read() && + (cur_index_tuple != last_identical_key_ptr)) {} +} + + +/** + DS-MRR implementation: multi_range_read_next() function. + + Calling convention is like multi_range_read_next() has. +*/ + +int DsMrr_impl::dsmrr_next(char **range_info) +{ + int res; + if (strategy_exhausted) + return HA_ERR_END_OF_FILE; + + while ((res= strategy->get_next(range_info)) == HA_ERR_END_OF_FILE) { - memcpy(range_info, rowid + h->ref_length, sizeof(void*)); + if ((res= strategy->refill_buffer(FALSE))) + break; /* EOF or error */ } -end: return res; } @@ -582,7 +1320,8 @@ end: /** DS-MRR implementation: multi_range_read_info() function */ -ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, +ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, + uint key_parts, uint *bufsz, uint *flags, COST_VECT *cost) { ha_rows res; @@ -590,8 +1329,9 @@ ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, uint def_bufsz= *bufsz; /* Get cost/flags/mem_usage of default MRR implementation */ - res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz, - &def_flags, cost); + res= primary_file->handler::multi_range_read_info(keyno, n_ranges, rows, + key_parts, &def_bufsz, + &def_flags, cost); DBUG_ASSERT(!res); if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || @@ -623,9 +1363,11 @@ ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, uint def_flags= *flags; uint def_bufsz= *bufsz; /* Get cost/flags/mem_usage of default MRR implementation */ - rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param, - n_ranges, &def_bufsz, - &def_flags, cost); + rows= primary_file->handler::multi_range_read_info_const(keyno, seq, + seq_init_param, + n_ranges, + &def_bufsz, + &def_flags, cost); if (rows == HA_POS_ERROR) { /* Default implementation can't perform MRR scan => we can't either */ @@ -683,7 +1425,28 @@ bool key_uses_partial_cols(TABLE *table, uint keyno) return FALSE; } -/** + +/* + Check if key/flags allow DS-MRR/CPK strategy to be used + + @param thd + @param keyno Index that will be used + @param mrr_flags + + @retval TRUE DS-MRR/CPK should be used + @retval FALSE Otherwise +*/ + +bool DsMrr_impl::check_cpk_scan(THD *thd, uint keyno, uint mrr_flags) +{ + return test((mrr_flags & HA_MRR_SINGLE_POINT) && + keyno == table->s->primary_key && + primary_file->primary_key_is_clustered() && + optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)); +} + + +/* DS-MRR Internals: Choose between Default MRR implementation and DS-MRR Make the choice between using Default MRR implementation and DS-MRR. @@ -706,22 +1469,26 @@ bool key_uses_partial_cols(TABLE *table, uint keyno) @retval FALSE DS-MRR implementation should be used */ + bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, COST_VECT *cost) { COST_VECT dsmrr_cost; bool res; THD *thd= current_thd; + + bool doing_cpk_scan= check_cpk_scan(thd, keyno, *flags); + bool using_cpk= test(keyno == table->s->primary_key && + primary_file->primary_key_is_clustered()); if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY || - (keyno == table->s->primary_key && h->primary_key_is_clustered()) || - key_uses_partial_cols(table, keyno)) + (using_cpk && !doing_cpk_scan) || key_uses_partial_cols(table, keyno)) { /* Use the default implementation */ *flags |= HA_MRR_USE_DEFAULT_IMPL; return TRUE; } - - uint add_len= table->key_info[keyno].key_length + h->ref_length; + + uint add_len= table->key_info[keyno].key_length + primary_file->ref_length; *bufsz -= add_len; if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost)) return TRUE; @@ -744,6 +1511,10 @@ bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, *flags &= ~HA_MRR_SORTED; /* We will return unordered output */ *cost= dsmrr_cost; res= FALSE; + + if ((*flags & HA_MRR_SINGLE_POINT) && + optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)) + *flags |= HA_MRR_MATERIALIZED_KEYS; } else { @@ -779,7 +1550,8 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, uint n_full_steps; double index_read_cost; - elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION)); + elem_size= primary_file->ref_length + + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION)); max_buff_entries = *buffer_size / elem_size; if (!max_buff_entries) @@ -807,7 +1579,7 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, cost->zero(); *buffer_size= max(*buffer_size, (size_t)(1.2*rows_in_last_step) * elem_size + - h->ref_length + table->key_info[keynr].key_length); + primary_file->ref_length + table->key_info[keynr].key_length); } COST_VECT last_step_cost; @@ -820,7 +1592,7 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, cost->mem_cost= (double)rows_in_last_step * elem_size; /* Total cost of all index accesses */ - index_read_cost= h->keyread_time(keynr, 1, rows); + index_read_cost= primary_file->keyread_time(keynr, 1, rows); cost->add_io(index_read_cost, 1 /* Random seeks */); return FALSE; } @@ -828,17 +1600,14 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, /* Get cost of one sort-and-sweep step + + It consists of two parts: + - sort an array of #nrows ROWIDs using qsort + - read #nrows records from table in a sweep. - SYNOPSIS - get_sort_and_sweep_cost() - table Table being accessed - nrows Number of rows to be sorted and retrieved - cost OUT The cost - - DESCRIPTION - Get cost of these operations: - - sort an array of #nrows ROWIDs using qsort - - read #nrows records from table in a sweep. + @param table Table being accessed + @param nrows Number of rows to be sorted and retrieved + @param cost OUT The cost of scan */ static |