1 files changed, 626 insertions, 0 deletions
diff --git a/sql/rowid_filter.cc b/sql/rowid_filter.cc
new file mode 100644
index 00000000000..865f22b431a
--- /dev/null
+++ b/sql/rowid_filter.cc
@@ -0,0 +1,626 @@
+/*
+   Copyright (c) 2018, 2019 MariaDB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
+
+#include "mariadb.h"
+#include "table.h"
+#include "sql_class.h"
+#include "opt_range.h"
+#include "rowid_filter.h"
+#include "sql_select.h"
+
+
+inline
+double Range_rowid_filter_cost_info::lookup_cost(
+                                     Rowid_filter_container_type cont_type)
+{
+  switch (cont_type) {
+  case SORTED_ARRAY_CONTAINER:
+    return log(est_elements)*0.01;
+  default:
+    DBUG_ASSERT(0);
+    return 0;
+  }
+}
+
+
+/**
+  @brief
+    The average gain in cost per row to use the range filter with this cost info
+*/
+
+inline
+double Range_rowid_filter_cost_info::avg_access_and_eval_gain_per_row(
+                                     Rowid_filter_container_type cont_type)
+{
+  return (1+1.0/TIME_FOR_COMPARE) * (1 - selectivity) -
+         lookup_cost(cont_type);
+}
+
+
+/**
+  @brief
+    The average adjusted gain in cost per row of using the filter
+
+  @param access_cost_factor the adjusted cost of access a row
+
+  @details
+    The current code to estimate the cost of a ref access is quite inconsistent:
+    in some cases the effect of page buffers is taken into account, for others
+    just the engine dependent read_time() is employed. That's why the average
+    cost of one random seek might differ from 1.
+    The parameter access_cost_factor can be considered as the cost of a random
+    seek that is used for the given ref access. Changing the cost of a random
+    seek we have to change the first coefficient in the linear formula by which
+    we calculate the gain of usage the given filter for a_adj. This function
+    calculates the value of a_adj.
+
+   @note
+     Currently we require that access_cost_factor should be a number between
+     0.0 and 1.0
+*/
+
+inline
+double Range_rowid_filter_cost_info::avg_adjusted_gain_per_row(
+				         double access_cost_factor)
+{
+  return a - (1 - access_cost_factor) * (1 - selectivity);
+}
+
+
+/**
+  @brief
+    Set the parameters used to choose the filter with the best adjusted gain
+
+  @note
+    This function must be called before the call of get_adjusted_gain()
+    for the given filter.
+*/
+
+inline void
+Range_rowid_filter_cost_info::set_adjusted_gain_param(double access_cost_factor)
+{
+  a_adj= avg_adjusted_gain_per_row(access_cost_factor);
+  cross_x_adj= b / a_adj;
+}
+
+
+/**
+  @brief
+    Initialize the cost info structure for a range filter
+
+  @param cont_type  The type of the container of the range filter
+  @param tab        The table for which the range filter is evaluated
+  @param idx        The index used to create this range filter
+*/
+
+void Range_rowid_filter_cost_info::init(Rowid_filter_container_type cont_type,
+                                        TABLE *tab, uint idx)
+{
+  container_type= cont_type;
+  table= tab;
+  key_no= idx;
+  est_elements= (ulonglong) (table->quick_rows[key_no]);
+  b= build_cost(container_type);
+  selectivity= est_elements/((double) table->stat_records());
+  a= avg_access_and_eval_gain_per_row(container_type);
+  if (a > 0)
+    cross_x= b/a;
+  else
+    cross_x= b+1;
+  abs_independent.clear_all();
+}
+
+
+/**
+  @brief
+   Return the cost of building a range filter of a certain type
+*/
+
+double
+Range_rowid_filter_cost_info::build_cost(Rowid_filter_container_type cont_type)
+{
+  double cost= 0;
+
+  cost+= table->quick_index_only_costs[key_no];
+
+  switch (cont_type) {
+
+  case SORTED_ARRAY_CONTAINER:
+    cost+= ARRAY_WRITE_COST * est_elements; /* cost filling the container */
+    cost+= ARRAY_SORT_C * est_elements * log(est_elements); /* sorting cost */
+    break;
+  default:
+    DBUG_ASSERT(0);
+  }
+
+  return cost;
+}
+
+
+Rowid_filter_container *Range_rowid_filter_cost_info::create_container()
+{
+  THD *thd= table->in_use;
+  uint elem_sz= table->file->ref_length;
+  Rowid_filter_container *res= 0;
+
+  switch (container_type) {
+  case SORTED_ARRAY_CONTAINER:
+    res= new (thd->mem_root) Rowid_filter_sorted_array((uint) est_elements,
+                                                       elem_sz);
+    break;
+  default:
+    DBUG_ASSERT(0);
+  }
+  return res;
+}
+
+
+static
+int compare_range_rowid_filter_cost_info_by_a(
+                        Range_rowid_filter_cost_info **filter_ptr_1,
+                        Range_rowid_filter_cost_info **filter_ptr_2)
+{
+  double diff= (*filter_ptr_2)->get_a() - (*filter_ptr_1)->get_a();
+  return (diff < 0 ? -1 : (diff > 0 ? 1 : 0));
+}
+
+
+/**
+  @brief
+    Prepare the array with cost info on range filters to be used by optimizer
+
+  @details
+    The function removes the array of cost info on range filters the elements
+    for those range filters that won't be ever chosen as the best filter, no
+    matter what index will be used to access the table and at what step the
+    table will be joined.
+*/
+
+void TABLE::prune_range_rowid_filters()
+{
+  /*
+    For the elements of the array with cost info on range filters
+    build a bit matrix of absolutely independent elements.
+    Two elements are absolutely independent if they such indexes that
+    there is no other index that overlaps both of them or is constraint
+    correlated with both of them. Use abs_independent key maps to store
+    the elements if this bit matrix.
+  */
+
+  Range_rowid_filter_cost_info **filter_ptr_1= range_rowid_filter_cost_info_ptr;
+  for (uint i= 0;
+       i < range_rowid_filter_cost_info_elems;
+       i++, filter_ptr_1++)
+  {
+    uint key_no= (*filter_ptr_1)->key_no;
+    Range_rowid_filter_cost_info **filter_ptr_2= filter_ptr_1 + 1;
+    for (uint j= i+1;
+         j < range_rowid_filter_cost_info_elems;
+         j++, filter_ptr_2++)
+    {
+      key_map map_1= key_info[key_no].overlapped;
+      map_1.merge(key_info[key_no].constraint_correlated);
+      key_map map_2= key_info[(*filter_ptr_2)->key_no].overlapped;
+      map_2.merge(key_info[(*filter_ptr_2)->key_no].constraint_correlated);
+      map_1.intersect(map_2);
+      if (map_1.is_clear_all())
+      {
+        (*filter_ptr_1)->abs_independent.set_bit((*filter_ptr_2)->key_no);
+        (*filter_ptr_2)->abs_independent.set_bit(key_no);
+      }
+    }
+  }
+
+  /* Sort the array range_filter_cost_info by 'a' in descending order */
+  my_qsort(range_rowid_filter_cost_info_ptr,
+           range_rowid_filter_cost_info_elems,
+           sizeof(Range_rowid_filter_cost_info *),
+           (qsort_cmp) compare_range_rowid_filter_cost_info_by_a);
+
+  /*
+    For each element check whether it is created for the filter that
+    can be ever chosen as the best one. If it's not the case remove
+    from the array. Otherwise put it in the array in such a place
+    that all already checked elements left the array are ordered by
+    cross_x.
+  */
+
+  Range_rowid_filter_cost_info **cand_filter_ptr=
+                                   range_rowid_filter_cost_info_ptr;
+  for (uint i= 0;
+       i < range_rowid_filter_cost_info_elems;
+       i++, cand_filter_ptr++)
+  {
+    bool is_pruned= false;
+    Range_rowid_filter_cost_info **usable_filter_ptr=
+                                     range_rowid_filter_cost_info_ptr;
+    key_map abs_indep;
+    abs_indep.clear_all();
+    for (uint j= 0; j < i; j++, usable_filter_ptr++)
+    {
+      if ((*cand_filter_ptr)->cross_x >= (*usable_filter_ptr)->cross_x)
+      {
+        if (abs_indep.is_set((*usable_filter_ptr)->key_no))
+	{
+          /*
+            The following is true here for the element e being checked:
+            There are at 2 elements e1 and e2 among already selected such that
+            e1.cross_x < e.cross_x and e1.a > e.a
+            and
+            e2.cross_x < e_cross_x and e2.a > e.a,
+            i.e. the range filters f1, f2 of both e1 and e2 always promise
+            better gains then the range filter of e.
+            As e1 and e2 are absolutely independent one of the range filters
+            f1, f2 will be always a better choice than f1 no matter what index
+            is chosen to access the table. Because of this the element e
+            can be safely removed from the array.
+	  */
+
+	  is_pruned= true;
+          break;
+        }
+        abs_indep.merge((*usable_filter_ptr)->abs_independent);
+      }
+      else
+      {
+        /*
+          Move the element being checked to the proper position to have all
+          elements that have been already checked to be sorted by cross_x
+	*/
+        Range_rowid_filter_cost_info *moved= *cand_filter_ptr;
+        memmove(usable_filter_ptr+1, usable_filter_ptr,
+                sizeof(Range_rowid_filter_cost_info *) * (i-j-1));
+        *usable_filter_ptr= moved;
+      }
+    }
+    if (is_pruned)
+    {
+      /* Remove the checked element from the array */
+      memmove(cand_filter_ptr, cand_filter_ptr+1,
+              sizeof(Range_rowid_filter_cost_info *) *
+              (range_rowid_filter_cost_info_elems - 1 - i));
+      range_rowid_filter_cost_info_elems--;
+    }
+  }
+}
+
+
+/**
+   @brief
+     Return maximum number of elements that a container allowed to have
+ */
+
+static ulonglong
+get_max_range_rowid_filter_elems_for_table(
+                                 THD *thd, TABLE *tab,
+                                 Rowid_filter_container_type cont_type)
+{
+  switch (cont_type) {
+  case SORTED_ARRAY_CONTAINER :
+    return thd->variables.max_rowid_filter_size/tab->file->ref_length;
+  default :
+    DBUG_ASSERT(0);
+    return 0;
+  }
+}
+
+
+/**
+  @brief
+    Prepare info on possible range filters used by optimizer
+
+  @param table    The thread handler
+
+  @details
+    The function first selects the indexes of the table that potentially
+    can be used for range filters and allocates an array of the objects
+    of the Range_rowid_filter_cost_info type to store cost info on
+    possible range filters and an array of pointers to these objects.
+    The latter is created for easy sorting of the objects with cost info
+    by different sort criteria. Then the function initializes the allocated
+    array with cost info for each possible range filter. After this
+    the function calls the method TABLE::prune_range_rowid_filters().
+    The method removes the elements of the array for the filters that
+    promise less gain then others remaining in the array in any situation
+    and optimizes the order of the elements for faster choice of the best
+    range filter.
+*/
+
+void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd)
+{
+  uint key_no;
+  key_map usable_range_filter_keys;
+  usable_range_filter_keys.clear_all();
+  key_map::Iterator it(quick_keys);
+
+  /*
+    From all indexes that can be used for range accesses select only such that
+    - range filter pushdown is supported by the engine for them     (1)
+    - they are not clustered primary                                (2)
+    - the range filter containers for them are not too large        (3)
+  */
+  while ((key_no= it++) != key_map::Iterator::BITMAP_END)
+  {
+    if (!(file->index_flags(key_no, 0, 1) & HA_DO_RANGE_FILTER_PUSHDOWN))  // !1
+      continue;
+    if (key_no == s->primary_key && file->primary_key_is_clustered())      // !2
+      continue;
+   if (quick_rows[key_no] >
+       get_max_range_rowid_filter_elems_for_table(thd, this,
+                                                  SORTED_ARRAY_CONTAINER)) // !3
+      continue;
+    usable_range_filter_keys.set_bit(key_no);
+  }
+
+  /*
+    Allocate an array of objects to store cost info for the selected filters
+    and allocate an array of pointers to these objects
+  */
+
+  range_rowid_filter_cost_info_elems= usable_range_filter_keys.bits_set();
+  if (!range_rowid_filter_cost_info_elems)
+    return;
+
+  range_rowid_filter_cost_info_ptr=
+    (Range_rowid_filter_cost_info **)
+      thd->calloc(sizeof(Range_rowid_filter_cost_info *) *
+                  range_rowid_filter_cost_info_elems);
+  range_rowid_filter_cost_info=
+    new (thd->mem_root)
+      Range_rowid_filter_cost_info[range_rowid_filter_cost_info_elems];
+  if (!range_rowid_filter_cost_info_ptr || !range_rowid_filter_cost_info)
+  {
+    range_rowid_filter_cost_info_elems= 0;
+    return;
+  }
+
+  /* Fill the allocated array with cost info on the selected range filters */
+
+  Range_rowid_filter_cost_info **curr_ptr= range_rowid_filter_cost_info_ptr;
+  Range_rowid_filter_cost_info *curr_filter_cost_info=
+                                                 range_rowid_filter_cost_info;
+
+  key_map::Iterator li(usable_range_filter_keys);
+  while ((key_no= li++) != key_map::Iterator::BITMAP_END)
+  {
+    *curr_ptr= curr_filter_cost_info;
+    curr_filter_cost_info->init(SORTED_ARRAY_CONTAINER, this, key_no);
+    curr_ptr++;
+    curr_filter_cost_info++;
+  }
+
+  prune_range_rowid_filters();
+}
+
+
+/**
+  @brief
+    Choose the best range filter for the given access of the table
+
+  @param access_key_no    The index by which the table is accessed
+  @param records   The estimated total number of key tuples with this access
+  @param access_cost_factor the cost of a random seek to access the table
+
+  @details
+    The function looks through the array of cost info for range filters
+    and chooses the element for the range filter that promise the greatest
+    gain with the the ref or range access of the table by access_key_no.
+    As the array is sorted by cross_x in ascending order the function stops
+    the look through as soon as it reaches the first element with
+    cross_x_adj > records because the range filter for this element and the
+    range filters for all remaining elements do not promise positive gains.
+
+  @note
+    It is easy to see that if cross_x[i] > cross_x[j] then
+    cross_x_adj[i] > cross_x_adj[j]
+
+  @retval  Pointer to the cost info for the range filter that promises
+           the greatest gain, NULL if there is no such range filter
+*/
+
+Range_rowid_filter_cost_info *
+TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no,
+                                                double records,
+                                                double access_cost_factor)
+{
+  if (range_rowid_filter_cost_info_elems == 0 ||
+      covering_keys.is_set(access_key_no))
+    return 0;
+
+  /*
+    Currently we do not support usage of range filters if the table
+    is accessed by the clustered primary key. It does not make sense
+    if a full key is used. If the table is accessed by a partial
+    clustered primary key it would, but the current InnoDB code does not
+    allow it. Later this limitation will be lifted
+  */
+  if (access_key_no == s->primary_key && file->primary_key_is_clustered())
+    return 0;
+
+  Range_rowid_filter_cost_info *best_filter= 0;
+  double best_filter_gain= 0;
+
+  key_map no_filter_usage= key_info[access_key_no].overlapped;
+  no_filter_usage.merge(key_info[access_key_no].constraint_correlated);
+  for (uint i= 0; i < range_rowid_filter_cost_info_elems ;  i++)
+  {
+    double curr_gain = 0;
+    Range_rowid_filter_cost_info *filter= range_rowid_filter_cost_info_ptr[i];
+
+    /*
+      Do not use a range filter that uses an in index correlated with
+      the index by which the table is accessed
+    */
+    if ((filter->key_no == access_key_no) ||
+        no_filter_usage.is_set(filter->key_no))
+      continue;
+
+    filter->set_adjusted_gain_param(access_cost_factor);
+
+    if (records < filter->cross_x_adj)
+    {
+      /* Does not make sense to look through the remaining filters */
+      break;
+    }
+
+    curr_gain= filter->get_adjusted_gain(records);
+    if (best_filter_gain < curr_gain)
+    {
+      best_filter_gain= curr_gain;
+      best_filter= filter;
+    }
+  }
+  return best_filter;
+}
+
+
+/**
+  @brief
+    Fill the range rowid filter performing the associated range index scan
+
+  @details
+    This function performs the range index scan associated with this
+    range filter and place into the filter the rowids / primary keys
+    read from key tuples when doing this scan.
+  @retval
+    false  on success
+    true   otherwise
+
+  @note
+    The function assumes that the quick select object to perform
+    the index range scan has been already created.
+
+  @note
+    Currently the same table handler is used to access the joined table
+    and to perform range index scan filling the filter.
+    In the future two different handlers will be used for this
+    purposes to facilitate a lazy building of the filter.
+*/
+
+bool Range_rowid_filter::fill()
+{
+  int rc= 0;
+  handler *file= table->file;
+  THD *thd= table->in_use;
+  QUICK_RANGE_SELECT* quick= (QUICK_RANGE_SELECT*) select->quick;
+
+  uint table_status_save= table->status;
+  Item *pushed_idx_cond_save= file->pushed_idx_cond;
+  uint pushed_idx_cond_keyno_save= file->pushed_idx_cond_keyno;
+  bool in_range_check_pushed_down_save= file->in_range_check_pushed_down;
+
+  table->status= 0;
+  file->pushed_idx_cond= 0;
+  file->pushed_idx_cond_keyno= MAX_KEY;
+  file->in_range_check_pushed_down= false;
+
+  /* We're going to just read rowids / primary keys */
+  table->prepare_for_position();
+
+  table->file->ha_start_keyread(quick->index);
+
+  if (quick->init() || quick->reset())
+    rc= 1;
+
+  while (!rc)
+  {
+    rc= quick->get_next();
+    if (thd->killed)
+      rc= 1;
+    if (!rc)
+    {
+      file->position(quick->record);
+      if (container->add(NULL, (char*) file->ref))
+        rc= 1;
+      else
+        tracker->increment_container_elements_count();
+    }
+  }
+
+  quick->range_end();
+  table->file->ha_end_keyread();
+
+  table->status= table_status_save;
+  file->pushed_idx_cond= pushed_idx_cond_save;
+  file->pushed_idx_cond_keyno= pushed_idx_cond_keyno_save;
+  file->in_range_check_pushed_down= in_range_check_pushed_down_save;
+  tracker->report_container_buff_size(table->file->ref_length);
+
+  if (rc != HA_ERR_END_OF_FILE)
+    return 1;
+  table->file->rowid_filter_is_active= true;
+  return 0;
+}
+
+
+/**
+  @brief
+    Binary search in the sorted array of a rowid filter
+
+  @param ctxt   context of the search
+  @parab elem   rowid / primary key to look for
+
+  @details
+    The function looks for the rowid / primary key ' elem' in this container
+    assuming that ctxt contains a pointer to the TABLE structure created
+    for the table to whose row elem refers to.
+
+  @retval
+    true    elem is found in the container
+    false   otherwise
+*/
+
+bool Rowid_filter_sorted_array::check(void *ctxt, char *elem)
+{
+  TABLE *table= (TABLE *) ctxt;
+  if (!is_checked)
+  {
+    refpos_container.sort(refpos_order_cmp, (void *) (table->file));
+    is_checked= true;
+  }
+  int l= 0;
+  int r= refpos_container.elements()-1;
+  while (l <= r)
+  {
+    int m= (l + r) / 2;
+    int cmp= refpos_order_cmp((void *) (table->file),
+                              refpos_container.get_pos(m), elem);
+    if (cmp == 0)
+      return true;
+    if (cmp < 0)
+      l= m + 1;
+    else
+      r= m-1;
+  }
+  return false;
+}
+
+
+Range_rowid_filter::~Range_rowid_filter()
+{
+  delete container;
+  container= 0;
+  if (select)
+  {
+    if (select->quick)
+    {
+      delete select->quick;
+      select->quick= 0;
+    }
+    delete select;
+    select= 0;
+  }
+}