MWL#121-124 DS-MRR support for key-ordered retrieval, etc

- Merge into 5.3-main
author: Sergey Petrunya <psergey@askmonty.org> 2010-11-01 18:49:59 +0300
committer: Sergey Petrunya <psergey@askmonty.org> 2010-11-01 18:49:59 +0300
commit: 6c15806b68a668b7039d8e5b5f2c6623c50bff5f (patch)
tree: d3cc5d6d70ecf2e90336b414c931c403a3461249 /sql/multi_range_read.cc
parent: d48a8b60345c5b5f95c2ce590f7032a7c9f87c4b (diff)
parent: b76a8595c611bedf512b19a7c4ccc260f0d0a8f6 (diff)
download: mariadb-git-6c15806b68a668b7039d8e5b5f2c6623c50bff5f.tar.gz
1 files changed, 887 insertions, 190 deletions
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index 53699c47688..f577d4598e3 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -1,4 +1,5 @@
 #include "mysql_priv.h"
+#include <my_bit.h>
 #include "sql_select.h"
 
 /****************************************************************************
@@ -136,10 +137,16 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
 */
 
 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
-                                       uint *bufsz, uint *flags, COST_VECT *cost)
+                                       uint key_parts, uint *bufsz, 
+                                       uint *flags, COST_VECT *cost)
 {
-  *bufsz= 0; /* Default implementation doesn't need a buffer */
+  /* 
+    Currently we expect this function to be called only in preparation of scan
+    with HA_MRR_SINGLE_POINT property.
+  */
+  DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
 
+  *bufsz= 0; /* Default implementation doesn't need a buffer */
   *flags |= HA_MRR_USE_DEFAULT_IMPL;
 
   cost->zero();
@@ -207,7 +214,6 @@ handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
   DBUG_RETURN(0);
 }
 
-
 /**
   Get next record in MRR scan
 
@@ -223,7 +229,7 @@ handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
 
 int handler::multi_range_read_next(char **range_info)
 {
-  int UNINIT_VAR(result);
+  int result= HA_ERR_END_OF_FILE;
   int range_res;
   DBUG_ENTER("handler::multi_range_read_next");
 
@@ -276,6 +282,452 @@ scan_it_again:
   DBUG_RETURN(result);
 }
 
+
+/***** MRR_impl classes ****************************************************/
+
+int Mrr_simple_index_reader::get_next(char **range_info)
+{
+  while (!(res= h->handler::multi_range_read_next(range_info)))
+  {
+    KEY_MULTI_RANGE *curr_range= &h->handler::mrr_cur_range;
+    if (!h->mrr_funcs.skip_index_tuple ||
+        !h->mrr_funcs.skip_index_tuple(h->mrr_iter, curr_range->ptr))
+      break;
+  }
+  return res;
+}
+
+int Mrr_simple_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
+                                  void *seq_init_param, uint n_ranges,
+                                  uint mode, Buffer_manager *buf_manager_arg)
+{
+  HANDLER_BUFFER no_buffer = {NULL, NULL, NULL};
+  h= h_arg;
+  res= 0;
+  return h->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
+                                           mode, &no_buffer);
+}
+
+/**
+  DS-MRR/CPK: multi_range_read_next() function
+  
+  @param range_info  OUT  identifier of range that the returned record belongs to
+  
+  @note
+    This function walks over key buffer and does index reads, i.e. it produces
+    {current_record, range_id} pairs.
+
+    The function has the same call contract like multi_range_read_next()'s.
+
+    We actually iterate over nested sequences:
+    - a disjoint sequence of index ranges
+      - each range has multiple records
+        - each record goes into multiple identical ranges.
+
+  @retval 0                   OK, next record was successfully read
+  @retval HA_ERR_END_OF_FILE  End of records
+  @retval Other               Some other error
+*/
+
+int Mrr_ordered_index_reader::get_next(char **range_info_arg)
+{
+  DBUG_ENTER("Mrr_ordered_index_reader::get_next");
+  
+  if (!know_key_tuple_params) /* We're in startup phase */
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+
+  while (1)
+  {
+    bool have_record= FALSE;
+    if (scanning_key_val_iter)
+    {
+      if (kv_it.get_next())
+      {
+        kv_it.close();
+        scanning_key_val_iter= FALSE;
+      }
+      else
+        have_record= TRUE;
+    }
+    else
+    {
+      while (kv_it.init(this))
+      {
+        if (key_buffer->is_empty())
+        {
+          /*if (auto_refill)
+          {
+            int res;
+            if ((res= refill_buffer()))
+              DBUG_RETURN(res);
+            if (key_buffer->is_empty())
+            {
+              index_scan_eof= TRUE;
+              DBUG_RETURN(HA_ERR_END_OF_FILE);
+            }
+          }
+          else
+          */
+          {
+            /* Buffer refills are managed by somebody else for us */
+            index_scan_eof= TRUE;
+            DBUG_RETURN(HA_ERR_END_OF_FILE);
+          }
+        }
+      }
+      scanning_key_val_iter= TRUE;
+    }
+
+    if (have_record &&
+        (!mrr_funcs.skip_index_tuple ||
+         !mrr_funcs.skip_index_tuple(mrr_iter, *(char**)cur_range_info))
+        && 
+        (!mrr_funcs.skip_record ||
+         !mrr_funcs.skip_record(mrr_iter, *(char**)cur_range_info, NULL)))
+    {
+      break;
+    }
+    /* Go get another (record, range_id) combination */
+  } /* while */
+
+  memcpy(range_info_arg, cur_range_info, sizeof(void*));
+  DBUG_RETURN(0);
+}
+
+
+/**
+  DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
+  
+  Enumerate the input range (=key) sequence, fill the key buffer with 
+  (lookup_key, range_id) pairs and sort it.
+
+  When this function returns, either
+   - key buffer is non-empty, or
+   - key buffer is empty and source range sequence is exhausted
+  
+  @note
+    dsmrr_eof is set to indicate whether we've exhausted the list of ranges 
+    we're scanning.
+*/
+
+int Mrr_ordered_index_reader::refill_buffer()
+{
+  int res;
+  KEY_MULTI_RANGE cur_range;
+  uchar **range_info_ptr= (uchar**)&cur_range.ptr;
+  uchar *key_ptr;
+  DBUG_ENTER("Mrr_ordered_index_reader::refill_buffer");
+
+  DBUG_ASSERT(!know_key_tuple_params || key_buffer->is_empty());
+  if (know_key_tuple_params)
+  {
+    buf_manager->reset_buffer_sizes();
+    key_buffer->reset();
+    key_buffer->setup_writing(&key_ptr, keypar.key_size_in_keybuf,
+                              is_mrr_assoc? (uchar**)&range_info_ptr : NULL,
+                              sizeof(uchar*));
+  }
+#if 0
+
+  if (know_key_tuple_params)
+  {
+    if (do_rndpos_scan && rowid_buffer.is_empty())
+    {
+      /*
+        We're using two buffers and both of them are empty now. Restore the
+        original sizes
+      */
+      rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end);
+      key_buffer= &backward_key_buf;
+      key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end);
+    }
+  }
+  is all of the ifdef-ed stuff is handled above?
+#endif
+  while ((!know_key_tuple_params || key_buffer->can_write()) && 
+         !(res= mrr_funcs.next(mrr_iter, &cur_range)))
+  {
+    DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
+
+    if (!know_key_tuple_params)
+    {
+      /* This only happens when we've just started filling the buffer */
+      key_range *sample_key= &cur_range.start_key;
+      know_key_tuple_params= TRUE;
+      keypar.key_tuple_length= sample_key->length;
+      keypar.key_tuple_map= sample_key->keypart_map;
+      keypar.key_size_in_keybuf= keypar.use_key_pointers ? sizeof(char*) : keypar.key_tuple_length;
+      KEY *key_info= &h->get_table()->key_info[h->active_index];
+      keypar.index_ranges_unique= test(key_info->flags & HA_NOSAME && 
+                                       key_info->key_parts == 
+                                       my_count_bits(sample_key->keypart_map));
+      buf_manager->setup_buffer_sizes(keypar.key_size_in_keybuf, keypar.key_tuple_map);
+      key_buffer= buf_manager->get_key_buffer();
+      key_buffer->setup_writing(&key_ptr, keypar.key_size_in_keybuf,
+                               is_mrr_assoc? (uchar**)&range_info_ptr : NULL,
+                               sizeof(uchar*));
+      DBUG_ASSERT(key_buffer->can_write());
+    }
+    
+    /* Put key, or {key, range_id} pair into the buffer */
+    if (keypar.use_key_pointers)
+      key_ptr=(uchar*) &cur_range.start_key.key;
+    else
+      key_ptr=(uchar*) cur_range.start_key.key;
+
+    key_buffer->write();
+  }
+
+  no_more_keys= test(res);
+
+  key_buffer->sort((key_buffer->type() == Lifo_buffer::FORWARD)? 
+                     (qsort2_cmp)Mrr_ordered_index_reader::key_tuple_cmp_reverse : 
+                     (qsort2_cmp)Mrr_ordered_index_reader::key_tuple_cmp, 
+                   (void*)this);
+  
+  scanning_key_val_iter= FALSE;
+  index_scan_eof= FALSE; 
+
+  DBUG_RETURN((no_more_keys && key_buffer->is_empty())? HA_ERR_END_OF_FILE:0);
+}
+
+
+int Mrr_ordered_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
+                                   void *seq_init_param, uint n_ranges,
+                                   uint mode, Buffer_manager *buf_manager_arg)
+{
+  h= h_arg;
+  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
+  keypar.use_key_pointers= test(mode & HA_MRR_MATERIALIZED_KEYS);
+  is_mrr_assoc=    !test(mode & HA_MRR_NO_ASSOCIATION);
+  mrr_funcs= *seq_funcs;
+  know_key_tuple_params= FALSE;
+  buf_manager= buf_manager_arg;
+  return 0;
+}
+
+
+static int rowid_cmp_reverse(void *h, uchar *a, uchar *b)
+{
+  return - ((handler*)h)->cmp_ref(a, b);
+}
+
+
+int Mrr_ordered_rndpos_reader::init(handler *h_arg, 
+                                    Mrr_index_reader *index_reader_arg,
+                                    uint mode,
+                                    Lifo_buffer *buf)
+{
+  h= h_arg;
+  index_reader= index_reader_arg;
+  rowid_buffer= buf;
+  is_mrr_assoc=    !test(mode & HA_MRR_NO_ASSOCIATION);
+  //rowid_buff_elem_size= h->ref_length;
+  //if (!(mode & HA_MRR_NO_ASSOCIATION))
+  //  rowid_buff_elem_size += sizeof(char*);
+
+  index_reader_exhausted= FALSE;
+  ///int res= index_reader->refill_buffer();
+  ///if (res && res!=HA_ERR_END_OF_FILE)
+  ///  return res;
+  return 0;
+}
+
+
+/**
+  DS-MRR: Fill and sort the rowid buffer
+
+  Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into 
+  buffer. When the buffer is full or scan is completed, sort the buffer by 
+  rowid and return.
+
+  When this function returns, either rowid buffer is not empty, or the source
+  of lookup keys (i.e. ranges) is exhaused.
+  
+  dsmrr_eof is set to indicate whether we've exhausted the list of ranges we're
+  scanning. This function never returns HA_ERR_END_OF_FILE.
+
+  @retval 0      OK, the next portion of rowids is in the buffer,
+                 properly ordered
+  @retval other  Error
+*/
+
+
+int Mrr_ordered_rndpos_reader::refill_buffer()
+{
+  int res;
+  DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_buffer");
+
+  if (index_reader_exhausted)
+    DBUG_RETURN(HA_ERR_END_OF_FILE);
+
+  while ((res= refill2() == HA_ERR_END_OF_FILE))
+  {
+    if ((res= index_reader->refill_buffer()))
+    {
+      if (res == HA_ERR_END_OF_FILE)
+        index_reader_exhausted= TRUE;
+      break;
+    }
+  }
+  DBUG_RETURN(res);
+}
+
+
+/* This one refills without calling index_reader->refill_buffer(). */
+int Mrr_ordered_rndpos_reader::refill2()
+{
+  char *range_info;
+  uchar **range_info_ptr= (uchar**)&range_info;
+  int res;
+  DBUG_ENTER("Mrr_ordered_rndpos_reader::refill2");
+
+  DBUG_ASSERT(rowid_buffer->is_empty());
+  index_rowid= index_reader->get_rowid_ptr();
+  rowid_buffer->reset();
+  rowid_buffer->setup_writing(&index_rowid, h->ref_length,
+                              is_mrr_assoc? (uchar**)&range_info_ptr: NULL,
+                              sizeof(void*));
+
+  last_identical_rowid= NULL;
+
+  while (rowid_buffer->can_write())
+  {
+    res= index_reader->get_next(&range_info);
+
+    if (res)
+      break;
+
+    /* Put rowid, or {rowid, range_id} pair into the buffer */
+    index_reader->h->position(index_reader->h->get_table()->record[0]);
+
+    rowid_buffer->write();
+  }
+   
+  /* Sort the buffer contents by rowid */
+  rowid_buffer->sort((qsort2_cmp)rowid_cmp_reverse, (void*)h);
+
+  rowid_buffer->setup_reading(&rowid, h->ref_length,
+                              is_mrr_assoc? (uchar**)&rowids_range_id: NULL,
+                              sizeof(void*));
+  DBUG_RETURN(rowid_buffer->is_empty()? HA_ERR_END_OF_FILE : 0);
+}
+
+
+/**
+  DS-MRR implementation: multi_range_read_next() function.
+
+  Calling convention is like multi_range_read_next() has.
+*/
+
+int Mrr_ordered_rndpos_reader::get_next(char **range_info)
+{
+  int res;
+
+  while (last_identical_rowid)
+  {
+    /*
+      Current record (the one we've returned in previous call) was obtained
+      from a rowid that matched multiple range_ids. Return this record again,
+      with next matching range_id.
+    */
+    bool UNINIT_VAR(bres);
+    bres= rowid_buffer->read();
+    DBUG_ASSERT(!bres);
+
+    if (is_mrr_assoc)
+      memcpy(range_info, rowids_range_id, sizeof(uchar*));
+
+    if (rowid == last_identical_rowid)
+    {
+      last_identical_rowid= NULL; /* reached the last of identical rowids */
+    }
+
+    if (!index_reader->skip_record((char*)*range_info, rowid))
+    {
+      return 0;
+    }
+  }
+
+  while (1)
+  {
+#if 0      
+    if (rowid_buffer->is_empty()) /* We're out of rowids */
+    {
+      /* First, finish off the sorted keys we have */ 
+      if (!index_reader->eof())
+      {
+        res= refill_buffer();
+        if (res && res != HA_ERR_END_OF_FILE)
+          return res;
+      }
+
+      if (rowid_buffer->is_empty())
+      {
+        /*
+          Ok neither index_reader nor us have any records. Refill index
+          reader, then refill us.
+        */
+        // TODO: if key buffer is empty, too, redistribute the buffer space.
+        if ((res= index_reader->refill_buffer()) ||
+            (res= refill_buffer()))
+          return res;
+      }
+    }
+#endif
+   
+    last_identical_rowid= NULL;
+
+    /* Return eof if there are no rowids in the buffer after re-fill attempt */
+    if (rowid_buffer->read())
+      return HA_ERR_END_OF_FILE;
+
+    if (is_mrr_assoc)
+    {
+      memcpy(range_info, rowids_range_id, sizeof(uchar*));
+    }
+
+    if (index_reader->skip_record(*range_info, rowid))
+      continue;
+
+    res= h->ha_rnd_pos(h->get_table()->record[0], rowid);
+
+    if (res == HA_ERR_RECORD_DELETED)
+      continue;
+    
+    /* 
+      Check if subsequent buffer elements have the same rowid value as this
+      one. If yes, remember this fact so that we don't make any more rnd_pos()
+      calls with this value.
+    */
+    if (!res)
+    {
+      uchar *cur_rowid= rowid;
+      /* 
+        Note: this implies that SQL layer doesn't touch table->record[0]
+        between calls.
+      */
+      Lifo_buffer_iterator it;
+      it.init(rowid_buffer);
+      while (!it.read()) // reads to (rowid, ...)
+      {
+        if (h->cmp_ref(rowid, cur_rowid))
+          break;
+        last_identical_rowid= rowid;
+      }
+    }
+    return 0;
+  }
+
+  return res;
+}
+
+
+
+
+/************ MRR_impl classes end *********************************************/
+
+
 /****************************************************************************
  * DS-MRR implementation 
  ***************************************************************************/
@@ -302,9 +754,8 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
                            void *seq_init_param, uint n_ranges, uint mode,
                            HANDLER_BUFFER *buf)
 {
-  uint elem_size;
-  Item *pushed_cond= NULL;
-  handler *new_h2= 0;
+  THD *thd= current_thd;
+  int res;
   DBUG_ENTER("DsMrr_impl::dsmrr_init");
 
   /*
@@ -312,50 +763,140 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
     has not been called, so set the owner handler here as well.
   */
   h= h_arg;
-  if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED)
+  is_mrr_assoc=    !test(mode & HA_MRR_NO_ASSOCIATION);
+
+  if ((mode & HA_MRR_USE_DEFAULT_IMPL) || (mode & HA_MRR_SORTED))
+  {
+    DBUG_ASSERT(h->inited == handler::INDEX);
+    Mrr_simple_index_reader *s= &strategy_factory.simple_index_reader;
+    res= s->init(h, seq_funcs, seq_init_param, n_ranges, mode, this);
+    strategy= s;
+    DBUG_RETURN(res);
+  }
+  
+  /* Neither of strategies used below can handle sorting */
+  DBUG_ASSERT(!(mode & HA_MRR_SORTED));
+
+  /*
+    Determine whether we'll need to do key sorting and/or rnd_pos() scan
+  */
+  index_strategy= NULL;
+  Mrr_ordered_index_reader *ordered_idx_reader= NULL;
+  if ((mode & HA_MRR_SINGLE_POINT) &&
+      optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
   {
-    use_default_impl= TRUE;
-    const int retval=
-      h->handler::multi_range_read_init(seq_funcs, seq_init_param,
-                                        n_ranges, mode, buf);
-    DBUG_RETURN(retval);
+    index_strategy= ordered_idx_reader= &strategy_factory.ordered_index_reader;
   }
-  rowids_buf= buf->buffer;
+  else
+    index_strategy= &strategy_factory.simple_index_reader;
+
+  strategy= index_strategy;
+  /*
+    We don't need a rowid-to-rndpos step if
+     - We're doing a scan on clustered primary key
+     - [In the future] We're doing an index_only read
+  */
+  DBUG_ASSERT(h->inited == handler::INDEX || 
+              (h->inited == handler::RND && h2 && 
+               h2->inited == handler::INDEX));
 
-  is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
+  handler *h_idx= (h->inited == handler::INDEX)? h: h2;
+  keyno= h_idx->active_index;
+
+  Mrr_ordered_rndpos_reader *disk_strategy= NULL;
+  if (!(keyno == table->s->primary_key && h_idx->primary_key_is_clustered()))
+  {
+    strategy= disk_strategy= &strategy_factory.ordered_rndpos_reader;
+  }
 
   if (is_mrr_assoc)
-    status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
- 
-  rowids_buf_end= buf->buffer_end;
-  elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
-  rowids_buf_last= rowids_buf + 
-                      ((rowids_buf_end - rowids_buf)/ elem_size)*
-                      elem_size;
-  rowids_buf_end= rowids_buf_last;
+    status_var_increment(thd->status_var.ha_multi_range_read_init_count);
 
+  full_buf= buf->buffer;
+  full_buf_end= buf->buffer_end;
+  
+  if (strategy == index_strategy)
+  {
+    ///if (ordered_idx_reader)
+    //  ordered_idx_reader->auto_refill= TRUE;
+    /* Index strategy serves it all. We don't need two handlers, etc */
+    /* Give the buffer to index strategy */
+    if ((res= index_strategy->init(h, seq_funcs, seq_init_param, n_ranges,
+                                   mode, this)))
+      goto error;
+  }
+  else
+  {
     /*
-    There can be two cases:
-    - This is the first call since index_init(), h2==NULL
-       Need to setup h2 then.
-    - This is not the first call, h2 is initalized and set up appropriately.
-       The caller might have called h->index_init(), need to switch h to
-       rnd_pos calls.
+      If we got here the request is served by both index and rndpos strategies
+      working together.
+
+    */
+    rowid_buffer.set_buffer_space(buf->buffer, buf->buffer_end);
+
+    if ((res= setup_two_handlers()))
+      DBUG_RETURN(res);
+
+    ///if (ordered_idx_reader)
+    ///  ordered_idx_reader->auto_refill= FALSE;
+
+    if ((res= index_strategy->init(h2, seq_funcs, seq_init_param, n_ranges, 
+                                   mode, this)) || 
+        (res= disk_strategy->init(h, index_strategy, mode, &rowid_buffer)))
+    {
+      goto error;
+    }
+  }
+  
+  res= strategy->refill_buffer();
+  if (res && res != HA_ERR_END_OF_FILE) //psergey-todo: remove EOF check here
+    goto error;
+
+  /*
+    If we have scanned through all intervals in *seq, then adjust *buf to 
+    indicate that the remaining buffer space will not be used.
   */
+//  if (dsmrr_eof) 
+//    buf->end_of_used_area= rowid_buffer.end_of_space();
+
+  
+  DBUG_RETURN(0);
+error:
+  close_second_handler();
+  strategy= NULL;
+  DBUG_RETURN(1);
+}
+
+
+/*
+  Whatever the current state is, make it so that we have two handler objects:
+  - h (the primary)    -  initialized for rnd_pos() scan
+  - h2 (the secondary) -  initialized for scanning the index specified in
+                          this->keyno
+  RETURN 
+    0        OK
+    HA_XXX   Error code
+*/
+
+int DsMrr_impl::setup_two_handlers()
+{
+  int res;
+  THD *thd= current_thd;
+  DBUG_ENTER("DsMrr_impl::setup_two_handlers");
   if (!h2)
   {
-    /* Create a separate handler object to do rndpos() calls. */
-    THD *thd= current_thd;
+    handler *new_h2;
+    Item *pushed_cond= NULL;
+    DBUG_ASSERT(h->inited == handler::INDEX);
+    /* Create a separate handler object to do rnd_pos() calls. */
     /*
       ::clone() takes up a lot of stack, especially on 64 bit platforms.
       The constant 5 is an empiric result.
     */
     if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
       DBUG_RETURN(1);
-    DBUG_ASSERT(h->active_index != MAX_KEY);
-    uint mrr_keyno= h->active_index;
 
-    /* Create a separate handler object to do rndpos() calls. */
+    /* Create a separate handler object to do rnd_pos() calls. */
     if (!(new_h2= h->clone(thd->mem_root)) || 
         new_h2->ha_external_lock(thd, F_RDLCK))
     {
@@ -363,88 +904,69 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
       DBUG_RETURN(1);
     }
 
-    if (mrr_keyno == h->pushed_idx_cond_keyno)
+    if (keyno == h->pushed_idx_cond_keyno)
       pushed_cond= h->pushed_idx_cond;
-
+    
+    Mrr_reader *save_strategy= strategy;
+    strategy= NULL;
     /*
       Caution: this call will invoke this->dsmrr_close(). Do not put the
-      created secondary table handler into this->h2 or it will delete it.
+      created secondary table handler new_h2 into this->h2 or it will delete 
+      it. Also, save the picked strategy
     */
-    if (h->ha_index_end())
-    {
-      h2=new_h2;
-      goto error;
-    }
+    res= h->ha_index_end();
 
+    strategy= save_strategy;
     h2= new_h2; /* Ok, now can put it into h2 */
+
+    if (res || (res= (h->ha_rnd_init(FALSE))))
+      goto error;
+
     table->prepare_for_position();
     h2->extra(HA_EXTRA_KEYREAD);
-  
-    if (h2->ha_index_init(mrr_keyno, FALSE))
+    h2->mrr_iter= h->mrr_iter;
+
+    if ((res= h2->ha_index_init(keyno, FALSE)))
       goto error;
 
-    use_default_impl= FALSE;
     if (pushed_cond)
-      h2->idx_cond_push(mrr_keyno, pushed_cond);
+      h2->idx_cond_push(keyno, pushed_cond);
   }
   else
   {
+    DBUG_ASSERT(h2 && h2->inited==handler::INDEX);
     /* 
       We get here when the access alternates betwen MRR scan(s) and non-MRR
       scans.
 
       Calling h->index_end() will invoke dsmrr_close() for this object,
-      which will delete h2. We need to keep it, so save put it away and dont
+      which will delete h2. We need to keep it, so put it away and dont
       let it be deleted:
     */
-    handler *save_h2= h2;
-    h2= NULL;
-    int res= (h->inited == handler::INDEX && h->ha_index_end());
-    h2= save_h2;
-    use_default_impl= FALSE;
-    if (res)
+    if (h->inited == handler::INDEX)
+    {
+      handler *save_h2= h2;
+      Mrr_reader *save_strategy= strategy;
+      h2= NULL;
+      strategy= NULL;
+      res= h->ha_index_end();
+      h2= save_h2;
+      strategy= save_strategy;
+      if (res)
+        goto error;
+    }
+    if ((h->inited != handler::RND) && h->ha_rnd_init(FALSE))
       goto error;
   }
-
-  if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
-                                          mode, buf) || 
-      dsmrr_fill_buffer())
-  {
-    goto error;
-  }
-  /*
-    If the above call has scanned through all intervals in *seq, then
-    adjust *buf to indicate that the remaining buffer space will not be used.
-  */
-  if (dsmrr_eof) 
-    buf->end_of_used_area= rowids_buf_last;
-
-  /*
-     h->inited == INDEX may occur when 'range checked for each record' is
-     used.
-  */
-  if ((h->inited != handler::RND) && 
-      ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) || 
-       (h->ha_rnd_init(FALSE))))
-      goto error;
-
-  use_default_impl= FALSE;
-  h->mrr_funcs= *seq_funcs;
-  
   DBUG_RETURN(0);
 error:
-  h2->ha_index_or_rnd_end();
-  h2->ha_external_lock(current_thd, F_UNLCK);
-  h2->close();
-  delete h2;
-  h2= NULL;
-  DBUG_RETURN(1);
+  //close_second_handler(); -- caller does that
+  DBUG_RETURN(res);
 }
 
 
-void DsMrr_impl::dsmrr_close()
+void DsMrr_impl::close_second_handler()
 {
-  DBUG_ENTER("DsMrr_impl::dsmrr_close");
   if (h2)
   {
     h2->ha_index_or_rnd_end();
@@ -453,136 +975,284 @@ void DsMrr_impl::dsmrr_close()
     delete h2;
     h2= NULL;
   }
-  use_default_impl= TRUE;
-  DBUG_VOID_RETURN;
 }
 
 
-static int rowid_cmp(void *h, uchar *a, uchar *b)
+void DsMrr_impl::dsmrr_close()
 {
-  return ((handler*)h)->cmp_ref(a, b);
+  DBUG_ENTER("DsMrr_impl::dsmrr_close");
+  close_second_handler();
+  strategy= NULL;
+  DBUG_VOID_RETURN;
 }
 
 
-/**
-  DS-MRR: Fill the buffer with rowids and sort it by rowid
-
-  {This is an internal function of DiskSweep MRR implementation}
-  Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into 
-  buffer. When the buffer is full or scan is completed, sort the buffer by 
-  rowid and return.
-  
-  The function assumes that rowids buffer is empty when it is invoked. 
-  
-  @param h  Table handler
-
-  @retval 0      OK, the next portion of rowids is in the buffer,
-                 properly ordered
-  @retval other  Error
+/* 
+  my_qsort2-compatible function to compare key tuples 
 */
 
-int DsMrr_impl::dsmrr_fill_buffer()
+int Mrr_ordered_index_reader::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
 {
-  char *range_info;
+  Mrr_ordered_index_reader *this_= (Mrr_ordered_index_reader*)arg;
+  TABLE *table= this_->h->get_table();
   int res;
-  DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
-
-  rowids_buf_cur= rowids_buf;
-  while ((rowids_buf_cur < rowids_buf_end) && 
-         !(res= h2->handler::multi_range_read_next(&range_info)))
+  KEY_PART_INFO *part= table->key_info[this_->h->active_index].key_part;
+  
+  if (this_->keypar.use_key_pointers)
   {
-    KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
-    if (h2->mrr_funcs.skip_index_tuple &&
-        h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr))
-      continue;
-    
-    /* Put rowid, or {rowid, range_id} pair into the buffer */
-    h2->position(table->record[0]);
-    memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
-    rowids_buf_cur += h2->ref_length;
+    /* the buffer stores pointers to keys, get to the keys */
+    key1= *((uchar**)key1);
+    key2= *((uchar**)key2);  // todo is this alignment-safe?
+  }
 
-    if (is_mrr_assoc)
+  uchar *key1_end= key1 + this_->keypar.key_tuple_length;
+
+  while (key1 < key1_end)
+  {
+    Field* f = part->field;
+    int len = part->store_length;
+    if (part->null_bit)
     {
-      memcpy(rowids_buf_cur, &range_info, sizeof(void*));
-      rowids_buf_cur += sizeof(void*);
+      if (*key1) // key1 == NULL
+      {
+        if (!*key2) // key1(NULL) < key2(notNULL)
+          return -1;
+        goto equals;
+      }
+      else if (*key2) // key1(notNULL) > key2 (NULL)
+        return 1;
+      // Step over NULL byte for f->cmp().
+      key1++;
+      key2++;
+      len--;
     }
+    
+    if ((res= f->key_cmp(key1, key2)))
+      return res;
+equals:
+    key1 += len;
+    key2 += len;
+    part++;
+  }
+  return 0;
+}
+
+
+int Mrr_ordered_index_reader::key_tuple_cmp_reverse(void* arg, uchar* key1, uchar* key2)
+{
+  return -key_tuple_cmp(arg, key1, key2);
+}
+
+
+/**
+  Setup key/rowid buffer sizes based on sample_key and its length.
+  
+  @param
+    sample_key  A lookup key to use as a sample. It is assumed that
+                all other keys will have the same length/etc.
+  @note
+    This function must be called when all buffers are empty
+*/
+
+void DsMrr_impl::setup_buffer_sizes(uint key_size_in_keybuf, 
+                                    key_part_map key_tuple_map)
+{
+  uint key_buff_elem_size= key_size_in_keybuf + 
+                           (int)is_mrr_assoc * sizeof(void*);
+  
+  KEY *key_info= &h->get_table()->key_info[keyno];
+  if (strategy == index_strategy)
+  {
+    /* Give all space to the key buffer, key buffer must be forward */
+    key_buffer= &forward_key_buf;
+    key_buffer->set_buffer_space(full_buf, full_buf_end);
+
+    /* Just in case, tell rowid buffer that it has zero size: */
+    rowid_buffer.set_buffer_space(full_buf_end, full_buf_end);
+    return;
+  }
+  
+  /* 
+    Ok if we got here we need to allocate one part of the buffer 
+    for keys and another part for rowids.
+  */
+  uint rowid_buf_elem_size= h->ref_length + 
+                            (int)is_mrr_assoc * sizeof(char*);
+  
+  /*
+    Use rec_per_key statistics as a basis to find out how many rowids 
+    we'll get for each key value.
+     TODO: are we guaranteed to get r_p_c==1 for unique keys?
+     TODO: what should be the default value to use when there is no 
+           statistics?
+  */
+  uint parts= my_count_bits(key_tuple_map);
+  ulong rpc;
+  if ((rpc= key_info->rec_per_key[parts - 1]))
+  {
+    rowid_buf_elem_size *= rpc;
   }
 
-  if (res && res != HA_ERR_END_OF_FILE)
-    DBUG_RETURN(res); 
-  dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
+  double fraction_for_rowids=
+    ((double) rowid_buf_elem_size / 
+         ((double)rowid_buf_elem_size + key_buff_elem_size));
 
-  /* Sort the buffer contents by rowid */
-  uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
-  uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
+  size_t bytes_for_rowids= 
+    round(fraction_for_rowids * (full_buf_end - full_buf));
   
-  my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
-            (void*)h);
-  rowids_buf_last= rowids_buf_cur;
-  rowids_buf_cur=  rowids_buf;
-  DBUG_RETURN(0);
+  uint bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids;
+
+  if (bytes_for_keys < key_buff_elem_size + 1)
+  {
+    uint add= key_buff_elem_size + 1 - bytes_for_keys;
+    bytes_for_rowids -= add;
+    DBUG_ASSERT(bytes_for_rowids >= 
+                (h->ref_length + (int)is_mrr_assoc * sizeof(char*) + 1));
+  }
+
+  rowid_buffer_end= full_buf + bytes_for_rowids;
+  rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end);
+  key_buffer= &backward_key_buf;
+  key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end); 
 }
 
 
+void DsMrr_impl::reset_buffer_sizes()
+{
+  if (strategy != index_strategy)
+  {
+    /*
+      Ok we have both ordered index reader and there is a disk rearder. 
+      Redistribute the buffer space.
+    */
+    rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end);
+    key_buffer= &backward_key_buf;
+    key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end);
+  }
+}
+
 /**
-  DS-MRR implementation: multi_range_read_next() function
+  Take unused space from the key buffer and give it to the rowid buffer
 */
+//psergey-todo: do invoke this function.
+void DsMrr_impl::reallocate_buffer_space()
+{
+  uchar *unused_start, *unused_end;
+  key_buffer->remove_unused_space(&unused_start, &unused_end);
+  rowid_buffer.grow(unused_start, unused_end);
+}
 
-int DsMrr_impl::dsmrr_next(char **range_info)
+
+//////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////
+
+bool Key_value_records_iterator::init(Mrr_ordered_index_reader *owner_arg)
 {
   int res;
-  uchar *cur_range_info= 0;
-  uchar *rowid;
+  //h= h_arg;
+  //param= param_arg;
+  owner= owner_arg;
+
+  identical_key_it.init(owner->key_buffer);
+  /* Get the first pair into (cur_index_tuple, cur_range_info) */ 
+  owner->key_buffer->setup_reading(&cur_index_tuple, owner->keypar.key_size_in_keybuf,
+                            owner->is_mrr_assoc? (uchar**)&owner->cur_range_info: NULL,
+                            sizeof(void*));
+
+  if (identical_key_it.read())
+    return TRUE;
 
-  if (use_default_impl)
-    return h->handler::multi_range_read_next(range_info);
+  uchar *key_in_buf= cur_index_tuple;
+
+  last_identical_key_ptr= cur_index_tuple;
+  if (owner->keypar.use_key_pointers)
+    cur_index_tuple= *((uchar**)cur_index_tuple);
   
-  do
+  /* Check out how many more identical keys are following */
+  uchar *save_cur_index_tuple= cur_index_tuple;
+  while (!identical_key_it.read())
   {
-    if (rowids_buf_cur == rowids_buf_last)
-    {
-      if (dsmrr_eof)
-      {
-        res= HA_ERR_END_OF_FILE;
-        goto end;
-      }
-      res= dsmrr_fill_buffer();
-      if (res)
-        goto end;
-    }
-   
-    /* return eof if there are no rowids in the buffer after re-fill attempt */
-    if (rowids_buf_cur == rowids_buf_last)
+    if (Mrr_ordered_index_reader::key_tuple_cmp(owner, key_in_buf, cur_index_tuple))
+      break;
+    last_identical_key_ptr= cur_index_tuple;
+  }
+  identical_key_it.init(owner->key_buffer);
+  cur_index_tuple= save_cur_index_tuple;
+  res= owner->h->ha_index_read_map(owner->h->get_table()->record[0], 
+                            cur_index_tuple, 
+                            owner->keypar.key_tuple_map, 
+                            HA_READ_KEY_EXACT);
+
+  if (res)
+  {
+    close();
+    return res; /* Fatal error */
+  }
+  get_next_row= FALSE;
+  return 0;
+}
+
+
+int Key_value_records_iterator::get_next()
+{
+  int res;
+
+  if (get_next_row)
+  {
+    if (owner->keypar.index_ranges_unique)
+      return HA_ERR_END_OF_FILE;  /* Max one match */
+    
+    handler *h= owner->h;
+    if ((res= h->ha_index_next_same(h->get_table()->record[0], 
+                                    cur_index_tuple, 
+                                    owner->keypar.key_tuple_length)))
     {
-      res= HA_ERR_END_OF_FILE;
-      goto end;
+      /* EOF is EOF for iterator, also, any error means EOF on the iterator */
+      return res;
     }
-    rowid= rowids_buf_cur;
+    identical_key_it.init(owner->key_buffer);
+    get_next_row= FALSE;
+  }
 
-    if (is_mrr_assoc)
-      memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**));
+  identical_key_it.read(); // This gets us next range_id.
+  if (!last_identical_key_ptr || (cur_index_tuple == last_identical_key_ptr))
+  {
+    get_next_row= TRUE;
+  }
+  return 0;
+}
 
-    rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
-    if (h2->mrr_funcs.skip_record &&
-	h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
-      continue;
-    res= h->ha_rnd_pos(table->record[0], rowid);
-    break;
-  } while (true);
- 
-  if (is_mrr_assoc)
+void Key_value_records_iterator::close()
+{
+  while (!owner->key_buffer->read() && 
+         (cur_index_tuple != last_identical_key_ptr)) {}
+}
+
+
+/**
+  DS-MRR implementation: multi_range_read_next() function.
+
+  Calling convention is like multi_range_read_next() has.
+*/
+
+int DsMrr_impl::dsmrr_next(char **range_info)
+{
+  int res;
+  while ((res= strategy->get_next(range_info)) == HA_ERR_END_OF_FILE)
   {
-    memcpy(range_info, rowid + h->ref_length, sizeof(void*));
+    if ((res= strategy->refill_buffer()))
+      break; /* EOF or error */
   }
-end:
   return res;
+  //return strategy->get_next(range_info);
 }
 
 
 /**
   DS-MRR implementation: multi_range_read_info() function
 */
-ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
+ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, 
+                               uint key_parts,
                                uint *bufsz, uint *flags, COST_VECT *cost)
 {  
   ha_rows res;
@@ -590,8 +1260,8 @@ ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
   uint def_bufsz= *bufsz;
 
   /* Get cost/flags/mem_usage of default MRR implementation */
-  res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
-                                         &def_flags, cost);
+  res= h->handler::multi_range_read_info(keyno, n_ranges, rows, key_parts, 
+                                         &def_bufsz, &def_flags, cost);
   DBUG_ASSERT(!res);
 
   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || 
@@ -683,7 +1353,29 @@ bool key_uses_partial_cols(TABLE *table, uint keyno)
   return FALSE;
 }
 
-/**
+
+/*
+  Check if key/flags allow DS-MRR/CPK strategy to be used
+  
+  @param thd
+  @param keyno      Index that will be used
+  @param  mrr_flags  
+  
+  @retval TRUE   DS-MRR/CPK should be used
+  @retval FALSE  Otherwise
+*/
+
+bool DsMrr_impl::check_cpk_scan(THD *thd, uint keyno, uint mrr_flags)
+{
+  return test((mrr_flags & HA_MRR_SINGLE_POINT) &&  // check
+        //      !(mrr_flags & HA_MRR_SORTED) && 
+              keyno == table->s->primary_key && 
+              h->primary_key_is_clustered() && 
+              optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS)); //check
+}
+
+
+/*
   DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
 
   Make the choice between using Default MRR implementation and DS-MRR.
@@ -706,21 +1398,25 @@ bool key_uses_partial_cols(TABLE *table, uint keyno)
   @retval FALSE  DS-MRR implementation should be used
 */
 
+
 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
                                  uint *bufsz, COST_VECT *cost)
 {
   COST_VECT dsmrr_cost;
   bool res;
   THD *thd= current_thd;
+
+  bool doing_cpk_scan= check_cpk_scan(thd, keyno, *flags); 
+  bool using_cpk= test(keyno == table->s->primary_key &&
+                       h->primary_key_is_clustered());
   if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY ||
-      (keyno == table->s->primary_key && h->primary_key_is_clustered()) ||
-       key_uses_partial_cols(table, keyno))
+      (using_cpk && !doing_cpk_scan) || key_uses_partial_cols(table, keyno))
   {
     /* Use the default implementation */
     *flags |= HA_MRR_USE_DEFAULT_IMPL;
     return TRUE;
   }
-  
+
   uint add_len= table->key_info[keyno].key_length + h->ref_length; 
   *bufsz -= add_len;
   if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
@@ -744,6 +1440,10 @@ bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
     *flags &= ~HA_MRR_SORTED;          /* We will return unordered output */
     *cost= dsmrr_cost;
     res= FALSE;
+
+    if ((*flags & HA_MRR_SINGLE_POINT) && 
+         optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
+      *flags |= HA_MRR_MATERIALIZED_KEYS;
   }
   else
   {
@@ -828,17 +1528,14 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
 
 /* 
   Get cost of one sort-and-sweep step
+  
+  It consists of two parts:
+   - sort an array of #nrows ROWIDs using qsort
+   - read #nrows records from table in a sweep.
 
-  SYNOPSIS
-    get_sort_and_sweep_cost()
-      table       Table being accessed
-      nrows       Number of rows to be sorted and retrieved
-      cost   OUT  The cost
-
-  DESCRIPTION
-    Get cost of these operations:
-     - sort an array of #nrows ROWIDs using qsort
-     - read #nrows records from table in a sweep.
+  @param table       Table being accessed
+  @param nrows       Number of rows to be sorted and retrieved
+  @param cost   OUT  The cost of scan
 */
 
 static
author	Sergey Petrunya <psergey@askmonty.org>	2010-11-01 18:49:59 +0300
committer	Sergey Petrunya <psergey@askmonty.org>	2010-11-01 18:49:59 +0300
commit	6c15806b68a668b7039d8e5b5f2c6623c50bff5f (patch)
tree	d3cc5d6d70ecf2e90336b414c931c403a3461249 /sql/multi_range_read.cc
parent	d48a8b60345c5b5f95c2ce590f7032a7c9f87c4b (diff)
parent	b76a8595c611bedf512b19a7c4ccc260f0d0a8f6 (diff)
download	mariadb-git-6c15806b68a668b7039d8e5b5f2c6623c50bff5f.tar.gz