summaryrefslogtreecommitdiff
path: root/sql/multi_range_read.cc
diff options
context:
space:
mode:
authorSergey Petrunya <psergey@askmonty.org>2010-11-01 18:49:59 +0300
committerSergey Petrunya <psergey@askmonty.org>2010-11-01 18:49:59 +0300
commit6c15806b68a668b7039d8e5b5f2c6623c50bff5f (patch)
treed3cc5d6d70ecf2e90336b414c931c403a3461249 /sql/multi_range_read.cc
parentd48a8b60345c5b5f95c2ce590f7032a7c9f87c4b (diff)
parentb76a8595c611bedf512b19a7c4ccc260f0d0a8f6 (diff)
downloadmariadb-git-6c15806b68a668b7039d8e5b5f2c6623c50bff5f.tar.gz
MWL#121-124 DS-MRR support for key-ordered retrieval, etc
- Merge into 5.3-main
Diffstat (limited to 'sql/multi_range_read.cc')
-rw-r--r--sql/multi_range_read.cc1077
1 files changed, 887 insertions, 190 deletions
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index 53699c47688..f577d4598e3 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -1,4 +1,5 @@
#include "mysql_priv.h"
+#include <my_bit.h>
#include "sql_select.h"
/****************************************************************************
@@ -136,10 +137,16 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
*/
ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
- uint *bufsz, uint *flags, COST_VECT *cost)
+ uint key_parts, uint *bufsz,
+ uint *flags, COST_VECT *cost)
{
- *bufsz= 0; /* Default implementation doesn't need a buffer */
+ /*
+ Currently we expect this function to be called only in preparation of scan
+ with HA_MRR_SINGLE_POINT property.
+ */
+ DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
+ *bufsz= 0; /* Default implementation doesn't need a buffer */
*flags |= HA_MRR_USE_DEFAULT_IMPL;
cost->zero();
@@ -207,7 +214,6 @@ handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
DBUG_RETURN(0);
}
-
/**
Get next record in MRR scan
@@ -223,7 +229,7 @@ handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
int handler::multi_range_read_next(char **range_info)
{
- int UNINIT_VAR(result);
+ int result= HA_ERR_END_OF_FILE;
int range_res;
DBUG_ENTER("handler::multi_range_read_next");
@@ -276,6 +282,452 @@ scan_it_again:
DBUG_RETURN(result);
}
+
+/***** MRR_impl classes ****************************************************/
+
+int Mrr_simple_index_reader::get_next(char **range_info)
+{
+ while (!(res= h->handler::multi_range_read_next(range_info)))
+ {
+ KEY_MULTI_RANGE *curr_range= &h->handler::mrr_cur_range;
+ if (!h->mrr_funcs.skip_index_tuple ||
+ !h->mrr_funcs.skip_index_tuple(h->mrr_iter, curr_range->ptr))
+ break;
+ }
+ return res;
+}
+
+int Mrr_simple_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
+ void *seq_init_param, uint n_ranges,
+ uint mode, Buffer_manager *buf_manager_arg)
+{
+ HANDLER_BUFFER no_buffer = {NULL, NULL, NULL};
+ h= h_arg;
+ res= 0;
+ return h->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
+ mode, &no_buffer);
+}
+
+/**
+ DS-MRR/CPK: multi_range_read_next() function
+
+ @param range_info OUT identifier of range that the returned record belongs to
+
+ @note
+ This function walks over key buffer and does index reads, i.e. it produces
+ {current_record, range_id} pairs.
+
+ The function has the same call contract like multi_range_read_next()'s.
+
+ We actually iterate over nested sequences:
+ - a disjoint sequence of index ranges
+ - each range has multiple records
+ - each record goes into multiple identical ranges.
+
+ @retval 0 OK, next record was successfully read
+ @retval HA_ERR_END_OF_FILE End of records
+ @retval Other Some other error
+*/
+
+int Mrr_ordered_index_reader::get_next(char **range_info_arg)
+{
+ DBUG_ENTER("Mrr_ordered_index_reader::get_next");
+
+ if (!know_key_tuple_params) /* We're in startup phase */
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+
+ while (1)
+ {
+ bool have_record= FALSE;
+ if (scanning_key_val_iter)
+ {
+ if (kv_it.get_next())
+ {
+ kv_it.close();
+ scanning_key_val_iter= FALSE;
+ }
+ else
+ have_record= TRUE;
+ }
+ else
+ {
+ while (kv_it.init(this))
+ {
+ if (key_buffer->is_empty())
+ {
+ /*if (auto_refill)
+ {
+ int res;
+ if ((res= refill_buffer()))
+ DBUG_RETURN(res);
+ if (key_buffer->is_empty())
+ {
+ index_scan_eof= TRUE;
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
+ }
+ else
+ */
+ {
+ /* Buffer refills are managed by somebody else for us */
+ index_scan_eof= TRUE;
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+ }
+ }
+ }
+ scanning_key_val_iter= TRUE;
+ }
+
+ if (have_record &&
+ (!mrr_funcs.skip_index_tuple ||
+ !mrr_funcs.skip_index_tuple(mrr_iter, *(char**)cur_range_info))
+ &&
+ (!mrr_funcs.skip_record ||
+ !mrr_funcs.skip_record(mrr_iter, *(char**)cur_range_info, NULL)))
+ {
+ break;
+ }
+ /* Go get another (record, range_id) combination */
+ } /* while */
+
+ memcpy(range_info_arg, cur_range_info, sizeof(void*));
+ DBUG_RETURN(0);
+}
+
+
+/**
+ DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
+
+ Enumerate the input range (=key) sequence, fill the key buffer with
+ (lookup_key, range_id) pairs and sort it.
+
+ When this function returns, either
+ - key buffer is non-empty, or
+ - key buffer is empty and source range sequence is exhausted
+
+ @note
+ dsmrr_eof is set to indicate whether we've exhausted the list of ranges
+ we're scanning.
+*/
+
+int Mrr_ordered_index_reader::refill_buffer()
+{
+ int res;
+ KEY_MULTI_RANGE cur_range;
+ uchar **range_info_ptr= (uchar**)&cur_range.ptr;
+ uchar *key_ptr;
+ DBUG_ENTER("Mrr_ordered_index_reader::refill_buffer");
+
+ DBUG_ASSERT(!know_key_tuple_params || key_buffer->is_empty());
+ if (know_key_tuple_params)
+ {
+ buf_manager->reset_buffer_sizes();
+ key_buffer->reset();
+ key_buffer->setup_writing(&key_ptr, keypar.key_size_in_keybuf,
+ is_mrr_assoc? (uchar**)&range_info_ptr : NULL,
+ sizeof(uchar*));
+ }
+#if 0
+
+ if (know_key_tuple_params)
+ {
+ if (do_rndpos_scan && rowid_buffer.is_empty())
+ {
+ /*
+ We're using two buffers and both of them are empty now. Restore the
+ original sizes
+ */
+ rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end);
+ key_buffer= &backward_key_buf;
+ key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end);
+ }
+ }
+ is all of the ifdef-ed stuff is handled above?
+#endif
+ while ((!know_key_tuple_params || key_buffer->can_write()) &&
+ !(res= mrr_funcs.next(mrr_iter, &cur_range)))
+ {
+ DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
+
+ if (!know_key_tuple_params)
+ {
+ /* This only happens when we've just started filling the buffer */
+ key_range *sample_key= &cur_range.start_key;
+ know_key_tuple_params= TRUE;
+ keypar.key_tuple_length= sample_key->length;
+ keypar.key_tuple_map= sample_key->keypart_map;
+ keypar.key_size_in_keybuf= keypar.use_key_pointers ? sizeof(char*) : keypar.key_tuple_length;
+ KEY *key_info= &h->get_table()->key_info[h->active_index];
+ keypar.index_ranges_unique= test(key_info->flags & HA_NOSAME &&
+ key_info->key_parts ==
+ my_count_bits(sample_key->keypart_map));
+ buf_manager->setup_buffer_sizes(keypar.key_size_in_keybuf, keypar.key_tuple_map);
+ key_buffer= buf_manager->get_key_buffer();
+ key_buffer->setup_writing(&key_ptr, keypar.key_size_in_keybuf,
+ is_mrr_assoc? (uchar**)&range_info_ptr : NULL,
+ sizeof(uchar*));
+ DBUG_ASSERT(key_buffer->can_write());
+ }
+
+ /* Put key, or {key, range_id} pair into the buffer */
+ if (keypar.use_key_pointers)
+ key_ptr=(uchar*) &cur_range.start_key.key;
+ else
+ key_ptr=(uchar*) cur_range.start_key.key;
+
+ key_buffer->write();
+ }
+
+ no_more_keys= test(res);
+
+ key_buffer->sort((key_buffer->type() == Lifo_buffer::FORWARD)?
+ (qsort2_cmp)Mrr_ordered_index_reader::key_tuple_cmp_reverse :
+ (qsort2_cmp)Mrr_ordered_index_reader::key_tuple_cmp,
+ (void*)this);
+
+ scanning_key_val_iter= FALSE;
+ index_scan_eof= FALSE;
+
+ DBUG_RETURN((no_more_keys && key_buffer->is_empty())? HA_ERR_END_OF_FILE:0);
+}
+
+
+int Mrr_ordered_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
+ void *seq_init_param, uint n_ranges,
+ uint mode, Buffer_manager *buf_manager_arg)
+{
+ h= h_arg;
+ mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+ keypar.use_key_pointers= test(mode & HA_MRR_MATERIALIZED_KEYS);
+ is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
+ mrr_funcs= *seq_funcs;
+ know_key_tuple_params= FALSE;
+ buf_manager= buf_manager_arg;
+ return 0;
+}
+
+
+static int rowid_cmp_reverse(void *h, uchar *a, uchar *b)
+{
+ return - ((handler*)h)->cmp_ref(a, b);
+}
+
+
+int Mrr_ordered_rndpos_reader::init(handler *h_arg,
+ Mrr_index_reader *index_reader_arg,
+ uint mode,
+ Lifo_buffer *buf)
+{
+ h= h_arg;
+ index_reader= index_reader_arg;
+ rowid_buffer= buf;
+ is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
+ //rowid_buff_elem_size= h->ref_length;
+ //if (!(mode & HA_MRR_NO_ASSOCIATION))
+ // rowid_buff_elem_size += sizeof(char*);
+
+ index_reader_exhausted= FALSE;
+ ///int res= index_reader->refill_buffer();
+ ///if (res && res!=HA_ERR_END_OF_FILE)
+ /// return res;
+ return 0;
+}
+
+
+/**
+ DS-MRR: Fill and sort the rowid buffer
+
+ Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
+ buffer. When the buffer is full or scan is completed, sort the buffer by
+ rowid and return.
+
+ When this function returns, either rowid buffer is not empty, or the source
+ of lookup keys (i.e. ranges) is exhaused.
+
+ dsmrr_eof is set to indicate whether we've exhausted the list of ranges we're
+ scanning. This function never returns HA_ERR_END_OF_FILE.
+
+ @retval 0 OK, the next portion of rowids is in the buffer,
+ properly ordered
+ @retval other Error
+*/
+
+
+int Mrr_ordered_rndpos_reader::refill_buffer()
+{
+ int res;
+ DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_buffer");
+
+ if (index_reader_exhausted)
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
+
+ while ((res= refill2() == HA_ERR_END_OF_FILE))
+ {
+ if ((res= index_reader->refill_buffer()))
+ {
+ if (res == HA_ERR_END_OF_FILE)
+ index_reader_exhausted= TRUE;
+ break;
+ }
+ }
+ DBUG_RETURN(res);
+}
+
+
+/* This one refills without calling index_reader->refill_buffer(). */
+int Mrr_ordered_rndpos_reader::refill2()
+{
+ char *range_info;
+ uchar **range_info_ptr= (uchar**)&range_info;
+ int res;
+ DBUG_ENTER("Mrr_ordered_rndpos_reader::refill2");
+
+ DBUG_ASSERT(rowid_buffer->is_empty());
+ index_rowid= index_reader->get_rowid_ptr();
+ rowid_buffer->reset();
+ rowid_buffer->setup_writing(&index_rowid, h->ref_length,
+ is_mrr_assoc? (uchar**)&range_info_ptr: NULL,
+ sizeof(void*));
+
+ last_identical_rowid= NULL;
+
+ while (rowid_buffer->can_write())
+ {
+ res= index_reader->get_next(&range_info);
+
+ if (res)
+ break;
+
+ /* Put rowid, or {rowid, range_id} pair into the buffer */
+ index_reader->h->position(index_reader->h->get_table()->record[0]);
+
+ rowid_buffer->write();
+ }
+
+ /* Sort the buffer contents by rowid */
+ rowid_buffer->sort((qsort2_cmp)rowid_cmp_reverse, (void*)h);
+
+ rowid_buffer->setup_reading(&rowid, h->ref_length,
+ is_mrr_assoc? (uchar**)&rowids_range_id: NULL,
+ sizeof(void*));
+ DBUG_RETURN(rowid_buffer->is_empty()? HA_ERR_END_OF_FILE : 0);
+}
+
+
+/**
+ DS-MRR implementation: multi_range_read_next() function.
+
+ Calling convention is like multi_range_read_next() has.
+*/
+
+int Mrr_ordered_rndpos_reader::get_next(char **range_info)
+{
+ int res;
+
+ while (last_identical_rowid)
+ {
+ /*
+ Current record (the one we've returned in previous call) was obtained
+ from a rowid that matched multiple range_ids. Return this record again,
+ with next matching range_id.
+ */
+ bool UNINIT_VAR(bres);
+ bres= rowid_buffer->read();
+ DBUG_ASSERT(!bres);
+
+ if (is_mrr_assoc)
+ memcpy(range_info, rowids_range_id, sizeof(uchar*));
+
+ if (rowid == last_identical_rowid)
+ {
+ last_identical_rowid= NULL; /* reached the last of identical rowids */
+ }
+
+ if (!index_reader->skip_record((char*)*range_info, rowid))
+ {
+ return 0;
+ }
+ }
+
+ while (1)
+ {
+#if 0
+ if (rowid_buffer->is_empty()) /* We're out of rowids */
+ {
+ /* First, finish off the sorted keys we have */
+ if (!index_reader->eof())
+ {
+ res= refill_buffer();
+ if (res && res != HA_ERR_END_OF_FILE)
+ return res;
+ }
+
+ if (rowid_buffer->is_empty())
+ {
+ /*
+ Ok neither index_reader nor us have any records. Refill index
+ reader, then refill us.
+ */
+ // TODO: if key buffer is empty, too, redistribute the buffer space.
+ if ((res= index_reader->refill_buffer()) ||
+ (res= refill_buffer()))
+ return res;
+ }
+ }
+#endif
+
+ last_identical_rowid= NULL;
+
+ /* Return eof if there are no rowids in the buffer after re-fill attempt */
+ if (rowid_buffer->read())
+ return HA_ERR_END_OF_FILE;
+
+ if (is_mrr_assoc)
+ {
+ memcpy(range_info, rowids_range_id, sizeof(uchar*));
+ }
+
+ if (index_reader->skip_record(*range_info, rowid))
+ continue;
+
+ res= h->ha_rnd_pos(h->get_table()->record[0], rowid);
+
+ if (res == HA_ERR_RECORD_DELETED)
+ continue;
+
+ /*
+ Check if subsequent buffer elements have the same rowid value as this
+ one. If yes, remember this fact so that we don't make any more rnd_pos()
+ calls with this value.
+ */
+ if (!res)
+ {
+ uchar *cur_rowid= rowid;
+ /*
+ Note: this implies that SQL layer doesn't touch table->record[0]
+ between calls.
+ */
+ Lifo_buffer_iterator it;
+ it.init(rowid_buffer);
+ while (!it.read()) // reads to (rowid, ...)
+ {
+ if (h->cmp_ref(rowid, cur_rowid))
+ break;
+ last_identical_rowid= rowid;
+ }
+ }
+ return 0;
+ }
+
+ return res;
+}
+
+
+
+
+/************ MRR_impl classes end *********************************************/
+
+
/****************************************************************************
* DS-MRR implementation
***************************************************************************/
@@ -302,9 +754,8 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
void *seq_init_param, uint n_ranges, uint mode,
HANDLER_BUFFER *buf)
{
- uint elem_size;
- Item *pushed_cond= NULL;
- handler *new_h2= 0;
+ THD *thd= current_thd;
+ int res;
DBUG_ENTER("DsMrr_impl::dsmrr_init");
/*
@@ -312,50 +763,140 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
has not been called, so set the owner handler here as well.
*/
h= h_arg;
- if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED)
+ is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
+
+ if ((mode & HA_MRR_USE_DEFAULT_IMPL) || (mode & HA_MRR_SORTED))
+ {
+ DBUG_ASSERT(h->inited == handler::INDEX);
+ Mrr_simple_index_reader *s= &strategy_factory.simple_index_reader;
+ res= s->init(h, seq_funcs, seq_init_param, n_ranges, mode, this);
+ strategy= s;
+ DBUG_RETURN(res);
+ }
+
+ /* Neither of strategies used below can handle sorting */
+ DBUG_ASSERT(!(mode & HA_MRR_SORTED));
+
+ /*
+ Determine whether we'll need to do key sorting and/or rnd_pos() scan
+ */
+ index_strategy= NULL;
+ Mrr_ordered_index_reader *ordered_idx_reader= NULL;
+ if ((mode & HA_MRR_SINGLE_POINT) &&
+ optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
{
- use_default_impl= TRUE;
- const int retval=
- h->handler::multi_range_read_init(seq_funcs, seq_init_param,
- n_ranges, mode, buf);
- DBUG_RETURN(retval);
+ index_strategy= ordered_idx_reader= &strategy_factory.ordered_index_reader;
}
- rowids_buf= buf->buffer;
+ else
+ index_strategy= &strategy_factory.simple_index_reader;
+
+ strategy= index_strategy;
+ /*
+ We don't need a rowid-to-rndpos step if
+ - We're doing a scan on clustered primary key
+ - [In the future] We're doing an index_only read
+ */
+ DBUG_ASSERT(h->inited == handler::INDEX ||
+ (h->inited == handler::RND && h2 &&
+ h2->inited == handler::INDEX));
- is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
+ handler *h_idx= (h->inited == handler::INDEX)? h: h2;
+ keyno= h_idx->active_index;
+
+ Mrr_ordered_rndpos_reader *disk_strategy= NULL;
+ if (!(keyno == table->s->primary_key && h_idx->primary_key_is_clustered()))
+ {
+ strategy= disk_strategy= &strategy_factory.ordered_rndpos_reader;
+ }
if (is_mrr_assoc)
- status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
-
- rowids_buf_end= buf->buffer_end;
- elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
- rowids_buf_last= rowids_buf +
- ((rowids_buf_end - rowids_buf)/ elem_size)*
- elem_size;
- rowids_buf_end= rowids_buf_last;
+ status_var_increment(thd->status_var.ha_multi_range_read_init_count);
+ full_buf= buf->buffer;
+ full_buf_end= buf->buffer_end;
+
+ if (strategy == index_strategy)
+ {
+ ///if (ordered_idx_reader)
+ // ordered_idx_reader->auto_refill= TRUE;
+ /* Index strategy serves it all. We don't need two handlers, etc */
+ /* Give the buffer to index strategy */
+ if ((res= index_strategy->init(h, seq_funcs, seq_init_param, n_ranges,
+ mode, this)))
+ goto error;
+ }
+ else
+ {
/*
- There can be two cases:
- - This is the first call since index_init(), h2==NULL
- Need to setup h2 then.
- - This is not the first call, h2 is initalized and set up appropriately.
- The caller might have called h->index_init(), need to switch h to
- rnd_pos calls.
+ If we got here the request is served by both index and rndpos strategies
+ working together.
+
+ */
+ rowid_buffer.set_buffer_space(buf->buffer, buf->buffer_end);
+
+ if ((res= setup_two_handlers()))
+ DBUG_RETURN(res);
+
+ ///if (ordered_idx_reader)
+ /// ordered_idx_reader->auto_refill= FALSE;
+
+ if ((res= index_strategy->init(h2, seq_funcs, seq_init_param, n_ranges,
+ mode, this)) ||
+ (res= disk_strategy->init(h, index_strategy, mode, &rowid_buffer)))
+ {
+ goto error;
+ }
+ }
+
+ res= strategy->refill_buffer();
+ if (res && res != HA_ERR_END_OF_FILE) //psergey-todo: remove EOF check here
+ goto error;
+
+ /*
+ If we have scanned through all intervals in *seq, then adjust *buf to
+ indicate that the remaining buffer space will not be used.
*/
+// if (dsmrr_eof)
+// buf->end_of_used_area= rowid_buffer.end_of_space();
+
+
+ DBUG_RETURN(0);
+error:
+ close_second_handler();
+ strategy= NULL;
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Whatever the current state is, make it so that we have two handler objects:
+ - h (the primary) - initialized for rnd_pos() scan
+ - h2 (the secondary) - initialized for scanning the index specified in
+ this->keyno
+ RETURN
+ 0 OK
+ HA_XXX Error code
+*/
+
+int DsMrr_impl::setup_two_handlers()
+{
+ int res;
+ THD *thd= current_thd;
+ DBUG_ENTER("DsMrr_impl::setup_two_handlers");
if (!h2)
{
- /* Create a separate handler object to do rndpos() calls. */
- THD *thd= current_thd;
+ handler *new_h2;
+ Item *pushed_cond= NULL;
+ DBUG_ASSERT(h->inited == handler::INDEX);
+ /* Create a separate handler object to do rnd_pos() calls. */
/*
::clone() takes up a lot of stack, especially on 64 bit platforms.
The constant 5 is an empiric result.
*/
if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
DBUG_RETURN(1);
- DBUG_ASSERT(h->active_index != MAX_KEY);
- uint mrr_keyno= h->active_index;
- /* Create a separate handler object to do rndpos() calls. */
+ /* Create a separate handler object to do rnd_pos() calls. */
if (!(new_h2= h->clone(thd->mem_root)) ||
new_h2->ha_external_lock(thd, F_RDLCK))
{
@@ -363,88 +904,69 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
DBUG_RETURN(1);
}
- if (mrr_keyno == h->pushed_idx_cond_keyno)
+ if (keyno == h->pushed_idx_cond_keyno)
pushed_cond= h->pushed_idx_cond;
-
+
+ Mrr_reader *save_strategy= strategy;
+ strategy= NULL;
/*
Caution: this call will invoke this->dsmrr_close(). Do not put the
- created secondary table handler into this->h2 or it will delete it.
+ created secondary table handler new_h2 into this->h2 or it will delete
+ it. Also, save the picked strategy
*/
- if (h->ha_index_end())
- {
- h2=new_h2;
- goto error;
- }
+ res= h->ha_index_end();
+ strategy= save_strategy;
h2= new_h2; /* Ok, now can put it into h2 */
+
+ if (res || (res= (h->ha_rnd_init(FALSE))))
+ goto error;
+
table->prepare_for_position();
h2->extra(HA_EXTRA_KEYREAD);
-
- if (h2->ha_index_init(mrr_keyno, FALSE))
+ h2->mrr_iter= h->mrr_iter;
+
+ if ((res= h2->ha_index_init(keyno, FALSE)))
goto error;
- use_default_impl= FALSE;
if (pushed_cond)
- h2->idx_cond_push(mrr_keyno, pushed_cond);
+ h2->idx_cond_push(keyno, pushed_cond);
}
else
{
+ DBUG_ASSERT(h2 && h2->inited==handler::INDEX);
/*
We get here when the access alternates betwen MRR scan(s) and non-MRR
scans.
Calling h->index_end() will invoke dsmrr_close() for this object,
- which will delete h2. We need to keep it, so save put it away and dont
+ which will delete h2. We need to keep it, so put it away and dont
let it be deleted:
*/
- handler *save_h2= h2;
- h2= NULL;
- int res= (h->inited == handler::INDEX && h->ha_index_end());
- h2= save_h2;
- use_default_impl= FALSE;
- if (res)
+ if (h->inited == handler::INDEX)
+ {
+ handler *save_h2= h2;
+ Mrr_reader *save_strategy= strategy;
+ h2= NULL;
+ strategy= NULL;
+ res= h->ha_index_end();
+ h2= save_h2;
+ strategy= save_strategy;
+ if (res)
+ goto error;
+ }
+ if ((h->inited != handler::RND) && h->ha_rnd_init(FALSE))
goto error;
}
-
- if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
- mode, buf) ||
- dsmrr_fill_buffer())
- {
- goto error;
- }
- /*
- If the above call has scanned through all intervals in *seq, then
- adjust *buf to indicate that the remaining buffer space will not be used.
- */
- if (dsmrr_eof)
- buf->end_of_used_area= rowids_buf_last;
-
- /*
- h->inited == INDEX may occur when 'range checked for each record' is
- used.
- */
- if ((h->inited != handler::RND) &&
- ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) ||
- (h->ha_rnd_init(FALSE))))
- goto error;
-
- use_default_impl= FALSE;
- h->mrr_funcs= *seq_funcs;
-
DBUG_RETURN(0);
error:
- h2->ha_index_or_rnd_end();
- h2->ha_external_lock(current_thd, F_UNLCK);
- h2->close();
- delete h2;
- h2= NULL;
- DBUG_RETURN(1);
+ //close_second_handler(); -- caller does that
+ DBUG_RETURN(res);
}
-void DsMrr_impl::dsmrr_close()
+void DsMrr_impl::close_second_handler()
{
- DBUG_ENTER("DsMrr_impl::dsmrr_close");
if (h2)
{
h2->ha_index_or_rnd_end();
@@ -453,136 +975,284 @@ void DsMrr_impl::dsmrr_close()
delete h2;
h2= NULL;
}
- use_default_impl= TRUE;
- DBUG_VOID_RETURN;
}
-static int rowid_cmp(void *h, uchar *a, uchar *b)
+void DsMrr_impl::dsmrr_close()
{
- return ((handler*)h)->cmp_ref(a, b);
+ DBUG_ENTER("DsMrr_impl::dsmrr_close");
+ close_second_handler();
+ strategy= NULL;
+ DBUG_VOID_RETURN;
}
-/**
- DS-MRR: Fill the buffer with rowids and sort it by rowid
-
- {This is an internal function of DiskSweep MRR implementation}
- Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
- buffer. When the buffer is full or scan is completed, sort the buffer by
- rowid and return.
-
- The function assumes that rowids buffer is empty when it is invoked.
-
- @param h Table handler
-
- @retval 0 OK, the next portion of rowids is in the buffer,
- properly ordered
- @retval other Error
+/*
+ my_qsort2-compatible function to compare key tuples
*/
-int DsMrr_impl::dsmrr_fill_buffer()
+int Mrr_ordered_index_reader::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
{
- char *range_info;
+ Mrr_ordered_index_reader *this_= (Mrr_ordered_index_reader*)arg;
+ TABLE *table= this_->h->get_table();
int res;
- DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
-
- rowids_buf_cur= rowids_buf;
- while ((rowids_buf_cur < rowids_buf_end) &&
- !(res= h2->handler::multi_range_read_next(&range_info)))
+ KEY_PART_INFO *part= table->key_info[this_->h->active_index].key_part;
+
+ if (this_->keypar.use_key_pointers)
{
- KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
- if (h2->mrr_funcs.skip_index_tuple &&
- h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
- continue;
-
- /* Put rowid, or {rowid, range_id} pair into the buffer */
- h2->position(table->record[0]);
- memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
- rowids_buf_cur += h2->ref_length;
+ /* the buffer stores pointers to keys, get to the keys */
+ key1= *((uchar**)key1);
+ key2= *((uchar**)key2); // todo is this alignment-safe?
+ }
- if (is_mrr_assoc)
+ uchar *key1_end= key1 + this_->keypar.key_tuple_length;
+
+ while (key1 < key1_end)
+ {
+ Field* f = part->field;
+ int len = part->store_length;
+ if (part->null_bit)
{
- memcpy(rowids_buf_cur, &range_info, sizeof(void*));
- rowids_buf_cur += sizeof(void*);
+ if (*key1) // key1 == NULL
+ {
+ if (!*key2) // key1(NULL) < key2(notNULL)
+ return -1;
+ goto equals;
+ }
+ else if (*key2) // key1(notNULL) > key2 (NULL)
+ return 1;
+ // Step over NULL byte for f->cmp().
+ key1++;
+ key2++;
+ len--;
}
+
+ if ((res= f->key_cmp(key1, key2)))
+ return res;
+equals:
+ key1 += len;
+ key2 += len;
+ part++;
+ }
+ return 0;
+}
+
+
+int Mrr_ordered_index_reader::key_tuple_cmp_reverse(void* arg, uchar* key1, uchar* key2)
+{
+ return -key_tuple_cmp(arg, key1, key2);
+}
+
+
+/**
+ Setup key/rowid buffer sizes based on sample_key and its length.
+
+ @param
+ sample_key A lookup key to use as a sample. It is assumed that
+ all other keys will have the same length/etc.
+ @note
+ This function must be called when all buffers are empty
+*/
+
+void DsMrr_impl::setup_buffer_sizes(uint key_size_in_keybuf,
+ key_part_map key_tuple_map)
+{
+ uint key_buff_elem_size= key_size_in_keybuf +
+ (int)is_mrr_assoc * sizeof(void*);
+
+ KEY *key_info= &h->get_table()->key_info[keyno];
+ if (strategy == index_strategy)
+ {
+ /* Give all space to the key buffer, key buffer must be forward */
+ key_buffer= &forward_key_buf;
+ key_buffer->set_buffer_space(full_buf, full_buf_end);
+
+ /* Just in case, tell rowid buffer that it has zero size: */
+ rowid_buffer.set_buffer_space(full_buf_end, full_buf_end);
+ return;
+ }
+
+ /*
+ Ok if we got here we need to allocate one part of the buffer
+ for keys and another part for rowids.
+ */
+ uint rowid_buf_elem_size= h->ref_length +
+ (int)is_mrr_assoc * sizeof(char*);
+
+ /*
+ Use rec_per_key statistics as a basis to find out how many rowids
+ we'll get for each key value.
+ TODO: are we guaranteed to get r_p_c==1 for unique keys?
+ TODO: what should be the default value to use when there is no
+ statistics?
+ */
+ uint parts= my_count_bits(key_tuple_map);
+ ulong rpc;
+ if ((rpc= key_info->rec_per_key[parts - 1]))
+ {
+ rowid_buf_elem_size *= rpc;
}
- if (res && res != HA_ERR_END_OF_FILE)
- DBUG_RETURN(res);
- dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
+ double fraction_for_rowids=
+ ((double) rowid_buf_elem_size /
+ ((double)rowid_buf_elem_size + key_buff_elem_size));
- /* Sort the buffer contents by rowid */
- uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
- uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
+ size_t bytes_for_rowids=
+ round(fraction_for_rowids * (full_buf_end - full_buf));
- my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
- (void*)h);
- rowids_buf_last= rowids_buf_cur;
- rowids_buf_cur= rowids_buf;
- DBUG_RETURN(0);
+ uint bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids;
+
+ if (bytes_for_keys < key_buff_elem_size + 1)
+ {
+ uint add= key_buff_elem_size + 1 - bytes_for_keys;
+ bytes_for_rowids -= add;
+ DBUG_ASSERT(bytes_for_rowids >=
+ (h->ref_length + (int)is_mrr_assoc * sizeof(char*) + 1));
+ }
+
+ rowid_buffer_end= full_buf + bytes_for_rowids;
+ rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end);
+ key_buffer= &backward_key_buf;
+ key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end);
}
+void DsMrr_impl::reset_buffer_sizes()
+{
+ if (strategy != index_strategy)
+ {
+ /*
+ Ok we have both ordered index reader and there is a disk rearder.
+ Redistribute the buffer space.
+ */
+ rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end);
+ key_buffer= &backward_key_buf;
+ key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end);
+ }
+}
+
/**
- DS-MRR implementation: multi_range_read_next() function
+ Take unused space from the key buffer and give it to the rowid buffer
*/
+//psergey-todo: do invoke this function.
+void DsMrr_impl::reallocate_buffer_space()
+{
+ uchar *unused_start, *unused_end;
+ key_buffer->remove_unused_space(&unused_start, &unused_end);
+ rowid_buffer.grow(unused_start, unused_end);
+}
-int DsMrr_impl::dsmrr_next(char **range_info)
+
+//////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////
+
+bool Key_value_records_iterator::init(Mrr_ordered_index_reader *owner_arg)
{
int res;
- uchar *cur_range_info= 0;
- uchar *rowid;
+ //h= h_arg;
+ //param= param_arg;
+ owner= owner_arg;
+
+ identical_key_it.init(owner->key_buffer);
+ /* Get the first pair into (cur_index_tuple, cur_range_info) */
+ owner->key_buffer->setup_reading(&cur_index_tuple, owner->keypar.key_size_in_keybuf,
+ owner->is_mrr_assoc? (uchar**)&owner->cur_range_info: NULL,
+ sizeof(void*));
+
+ if (identical_key_it.read())
+ return TRUE;
- if (use_default_impl)
- return h->handler::multi_range_read_next(range_info);
+ uchar *key_in_buf= cur_index_tuple;
+
+ last_identical_key_ptr= cur_index_tuple;
+ if (owner->keypar.use_key_pointers)
+ cur_index_tuple= *((uchar**)cur_index_tuple);
- do
+ /* Check out how many more identical keys are following */
+ uchar *save_cur_index_tuple= cur_index_tuple;
+ while (!identical_key_it.read())
{
- if (rowids_buf_cur == rowids_buf_last)
- {
- if (dsmrr_eof)
- {
- res= HA_ERR_END_OF_FILE;
- goto end;
- }
- res= dsmrr_fill_buffer();
- if (res)
- goto end;
- }
-
- /* return eof if there are no rowids in the buffer after re-fill attempt */
- if (rowids_buf_cur == rowids_buf_last)
+ if (Mrr_ordered_index_reader::key_tuple_cmp(owner, key_in_buf, cur_index_tuple))
+ break;
+ last_identical_key_ptr= cur_index_tuple;
+ }
+ identical_key_it.init(owner->key_buffer);
+ cur_index_tuple= save_cur_index_tuple;
+ res= owner->h->ha_index_read_map(owner->h->get_table()->record[0],
+ cur_index_tuple,
+ owner->keypar.key_tuple_map,
+ HA_READ_KEY_EXACT);
+
+ if (res)
+ {
+ close();
+ return res; /* Fatal error */
+ }
+ get_next_row= FALSE;
+ return 0;
+}
+
+
+int Key_value_records_iterator::get_next()
+{
+ int res;
+
+ if (get_next_row)
+ {
+ if (owner->keypar.index_ranges_unique)
+ return HA_ERR_END_OF_FILE; /* Max one match */
+
+ handler *h= owner->h;
+ if ((res= h->ha_index_next_same(h->get_table()->record[0],
+ cur_index_tuple,
+ owner->keypar.key_tuple_length)))
{
- res= HA_ERR_END_OF_FILE;
- goto end;
+ /* EOF is EOF for iterator, also, any error means EOF on the iterator */
+ return res;
}
- rowid= rowids_buf_cur;
+ identical_key_it.init(owner->key_buffer);
+ get_next_row= FALSE;
+ }
- if (is_mrr_assoc)
- memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**));
+ identical_key_it.read(); // This gets us next range_id.
+ if (!last_identical_key_ptr || (cur_index_tuple == last_identical_key_ptr))
+ {
+ get_next_row= TRUE;
+ }
+ return 0;
+}
- rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
- if (h2->mrr_funcs.skip_record &&
- h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
- continue;
- res= h->ha_rnd_pos(table->record[0], rowid);
- break;
- } while (true);
-
- if (is_mrr_assoc)
+void Key_value_records_iterator::close()
+{
+ while (!owner->key_buffer->read() &&
+ (cur_index_tuple != last_identical_key_ptr)) {}
+}
+
+
+/**
+ DS-MRR implementation: multi_range_read_next() function.
+
+ Calling convention is like multi_range_read_next() has.
+*/
+
+int DsMrr_impl::dsmrr_next(char **range_info)
+{
+ int res;
+ while ((res= strategy->get_next(range_info)) == HA_ERR_END_OF_FILE)
{
- memcpy(range_info, rowid + h->ref_length, sizeof(void*));
+ if ((res= strategy->refill_buffer()))
+ break; /* EOF or error */
}
-end:
return res;
+ //return strategy->get_next(range_info);
}
/**
DS-MRR implementation: multi_range_read_info() function
*/
-ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
+ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
+ uint key_parts,
uint *bufsz, uint *flags, COST_VECT *cost)
{
ha_rows res;
@@ -590,8 +1260,8 @@ ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
uint def_bufsz= *bufsz;
/* Get cost/flags/mem_usage of default MRR implementation */
- res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
- &def_flags, cost);
+ res= h->handler::multi_range_read_info(keyno, n_ranges, rows, key_parts,
+ &def_bufsz, &def_flags, cost);
DBUG_ASSERT(!res);
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
@@ -683,7 +1353,29 @@ bool key_uses_partial_cols(TABLE *table, uint keyno)
return FALSE;
}
-/**
+
+/*
+ Check if key/flags allow DS-MRR/CPK strategy to be used
+
+ @param thd
+ @param keyno Index that will be used
+ @param mrr_flags
+
+ @retval TRUE DS-MRR/CPK should be used
+ @retval FALSE Otherwise
+*/
+
+bool DsMrr_impl::check_cpk_scan(THD *thd, uint keyno, uint mrr_flags)
+{
+ return test((mrr_flags & HA_MRR_SINGLE_POINT) && // check
+ // !(mrr_flags & HA_MRR_SORTED) &&
+ keyno == table->s->primary_key &&
+ h->primary_key_is_clustered() &&
+ optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)); //check
+}
+
+
+/*
DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
Make the choice between using Default MRR implementation and DS-MRR.
@@ -706,21 +1398,25 @@ bool key_uses_partial_cols(TABLE *table, uint keyno)
@retval FALSE DS-MRR implementation should be used
*/
+
bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
uint *bufsz, COST_VECT *cost)
{
COST_VECT dsmrr_cost;
bool res;
THD *thd= current_thd;
+
+ bool doing_cpk_scan= check_cpk_scan(thd, keyno, *flags);
+ bool using_cpk= test(keyno == table->s->primary_key &&
+ h->primary_key_is_clustered());
if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY ||
- (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
- key_uses_partial_cols(table, keyno))
+ (using_cpk && !doing_cpk_scan) || key_uses_partial_cols(table, keyno))
{
/* Use the default implementation */
*flags |= HA_MRR_USE_DEFAULT_IMPL;
return TRUE;
}
-
+
uint add_len= table->key_info[keyno].key_length + h->ref_length;
*bufsz -= add_len;
if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
@@ -744,6 +1440,10 @@ bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
*flags &= ~HA_MRR_SORTED; /* We will return unordered output */
*cost= dsmrr_cost;
res= FALSE;
+
+ if ((*flags & HA_MRR_SINGLE_POINT) &&
+ optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
+ *flags |= HA_MRR_MATERIALIZED_KEYS;
}
else
{
@@ -828,17 +1528,14 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
/*
Get cost of one sort-and-sweep step
+
+ It consists of two parts:
+ - sort an array of #nrows ROWIDs using qsort
+ - read #nrows records from table in a sweep.
- SYNOPSIS
- get_sort_and_sweep_cost()
- table Table being accessed
- nrows Number of rows to be sorted and retrieved
- cost OUT The cost
-
- DESCRIPTION
- Get cost of these operations:
- - sort an array of #nrows ROWIDs using qsort
- - read #nrows records from table in a sweep.
+ @param table Table being accessed
+ @param nrows Number of rows to be sorted and retrieved
+ @param cost OUT The cost of scan
*/
static