From 42fea34d4a9f4ed1ae87cf8494f07bcdfcc49e83 Mon Sep 17 00:00:00 2001
From: Igor Babaev <igor@askmonty.org>
Date: Mon, 20 Dec 2021 19:55:00 -0800
Subject: MDEV-27262 Unexpected index intersection with full index scan for an
 index

If when extracting a range condition for an index from the WHERE condition
Range Optimizer sees that the range condition covers the whole index then
such condition should be discarded because it cannot be used in any range
scan. In some cases Range Optimizer really does it, but there remained some
conditions for which it was not done. As a result the optimizer could
produce index merge plans with the full index scan for one of the indexes
participating in the index merge.
This could be observed in one of the test cases from index_merge1.inc
where a plan with index_merge_sort_union was produced and in the test case
reported for this bug where a plan with index_merge_sort_intersect was
produced. In both cases one of two index scans participating in index merge
ran over the whole index.
The patch slightly changes the original above mentioned test case from
index_merge1.inc to be able to produce an intended plan employing
index_merge_sort_union. The original query was left to show that index
merge is not used for it anymore.
It should be noted that for the plan with index_merge_sort_intersect could
be chosen for execution only due to a defect in the InnoDB code that
returns wrong estimates for the cardinality of big ranges.

This bug led to serious problems in 10.4+ where the optimization using
Rowid filters is employed (see mdev-26446).

Approved by Sergey Petrunia <sergey@mariadb.com>
---
 sql/opt_range.cc | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'sql/opt_range.cc')

diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index f3f184367c9..89ee8cf98e1 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -9329,7 +9329,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
 
   uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no);
 
-  for (key2=key2->first(); key2; )
+  for (key2=key2->first(); ; )
   {
     /*
       key1 consists of one or more ranges. tmp is the range currently
@@ -9343,6 +9343,16 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
                   ^
                   tmp
     */
+    if (key1->min_flag & NO_MIN_RANGE &&
+        key1->max_flag & NO_MAX_RANGE)
+    {
+      if (key1->maybe_flag)
+        return new SEL_ARG(SEL_ARG::MAYBE_KEY);
+      return 0;   // Always true OR
+    }
+    if (!key2)
+      break;
+
     SEL_ARG *tmp=key1->find_range(key2);
 
     /*
@@ -9413,6 +9423,13 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
         key2->copy_min(tmp);
         if (!(key1=key1->tree_delete(tmp)))
         {                                       // Only one key in tree
+          if (key2->min_flag & NO_MIN_RANGE &&
+              key2->max_flag & NO_MAX_RANGE)
+          {
+            if (key2->maybe_flag)
+              return new SEL_ARG(SEL_ARG::MAYBE_KEY);
+            return 0;   // Always true OR
+          }
           key1=key2;
           key1->make_root();
           key2=key2_next;
-- 
cgit v1.2.1


From c18896f9c1ce6e4b9a8519a2d5155698d82ae45a Mon Sep 17 00:00:00 2001
From: Monty <monty@mariadb.org>
Date: Mon, 27 Dec 2021 18:51:00 +0200
Subject: MDEV-14907 FEDERATEDX doesn't respect DISTINCT

Federated and Federatex cannot be used with ROR scans

Federated::position() and Federatex::position() is storing in 'ref' a
pointer into a local result set buffer. This means that one cannot
compare 'ref' from different handler instances to see if they point to the
same physical record.

This bug caused federated.federatedx to return wrong results when the
optimizer tried to use index_merge to resolve some queries.

Fixed by introducing table flag HA_NON_COMPARABLE_ROWID and using this
with the above handlers.

Todo:
- Fix multi_delete(), multi_update and read_records() to use primary key
  instead of 'ref' if case HA_NON_COMPARABLE_ROWID is set. The current
  code only works if we have only one range (like table scan) for the
  tables that will be updated in the second pass.
- Enable DBUG_ASSERT() in ha_federated::cmp_ref() and
  ha_federatedx::cmp_ref().
---
 sql/opt_range.cc | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'sql/opt_range.cc')

diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 5f13c9b3441..d3104019edb 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2678,6 +2678,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   records= head->stat_records();
   if (!records)
     records++;					/* purecov: inspected */
+  if (head->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
+    only_single_index_range_scan= 1;
 
   if (head->force_index || force_quick_range)
     scan_time= read_time= DBL_MAX;
@@ -11312,6 +11314,9 @@ static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
                                 table_key->user_defined_key_parts);
   uint pk_number;
   
+  if (param->table->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
+    return false;
+
   for (KEY_PART_INFO *kp= table_key->key_part; kp < key_part; kp++)
   {
     uint16 fieldnr= param->table->key_info[keynr].
-- 
cgit v1.2.1


From fdec8852014960b33b302fc2467cc535eff78186 Mon Sep 17 00:00:00 2001
From: Monty <monty@mariadb.org>
Date: Wed, 19 Jan 2022 18:34:45 +0200
Subject: MDEV-25830 optimizer_use_condition_selectivity=4 sometimes produces
 worse plan than optimizer_use_condition_selectivity=1

The issue was that calc_cond_selectivity_for_table prefered ranges with
many parts and when deciding on which selectivity to use.

Fixed by going through ranges according to the number of rows in the range.

This ensures that selectivity from ranges with few rows will be prefered
over ranges with many rows for indexes that uses the same columns.
---
 sql/opt_range.cc | 49 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

(limited to 'sql/opt_range.cc')

diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index d3104019edb..7599401dcb4 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3249,6 +3249,25 @@ double records_in_column_ranges(PARAM *param, uint idx,
 } 
 
 
+/*
+  Compare quick select ranges according to number of found rows
+  If there is equal amounts of rows, use the long key part.
+  The idea is that if we have keys (a),(a,b) and (a,b,c) and we have
+  a query like WHERE a=1 and b=1 and c=1,
+  it is better to use key (a,b,c) than (a) as it will ensure we don't also
+  use histograms for columns b and c
+*/
+
+static
+int cmp_quick_ranges(TABLE *table, uint *a, uint *b)
+{
+  int tmp= CMP_NUM(table->quick_rows[*a], table->quick_rows[*b]);
+  if (tmp)
+    return tmp;
+  return -CMP_NUM(table->quick_key_parts[*a], table->quick_key_parts[*b]);
+}
+
+
 /*
   Calculate the selectivity of the condition imposed on the rows of a table
 
@@ -3285,10 +3304,10 @@ double records_in_column_ranges(PARAM *param, uint idx,
 
 bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
 {
-  uint keynr;
-  uint max_quick_key_parts= 0;
+  uint keynr, range_index, ranges;
   MY_BITMAP *used_fields= &table->cond_set;
-  double table_records= (double)table->stat_records(); 
+  double table_records= (double)table->stat_records();
+  uint optimal_key_order[MAX_KEY];
   DBUG_ENTER("calculate_cond_selectivity_for_table");
 
   table->cond_selectivity= 1.0;
@@ -3327,23 +3346,21 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
   Json_writer_object trace_wrapper(thd);
   Json_writer_array selectivity_for_indexes(thd, "selectivity_for_indexes");
 
-  for (keynr= 0;  keynr < table->s->keys; keynr++)
-  {
+  /*
+    Walk through all quick ranges in the order of least found rows.
+  */
+  for (ranges= keynr= 0 ; keynr < table->s->keys; keynr++)
     if (table->quick_keys.is_set(keynr))
-      set_if_bigger(max_quick_key_parts, table->quick_key_parts[keynr]);
-  }
+      optimal_key_order[ranges++]= keynr;
 
-  /* 
-    Walk through all indexes, indexes where range access uses more keyparts 
-    go first.
-  */
-  for (uint quick_key_parts= max_quick_key_parts;
-       quick_key_parts; quick_key_parts--)
+  my_qsort2(optimal_key_order, ranges,
+            sizeof(optimal_key_order[0]),
+            (qsort2_cmp) cmp_quick_ranges, table);
+
+  for (range_index= 0 ; range_index < ranges ; range_index++)
   {
-    for (keynr= 0;  keynr < table->s->keys; keynr++)
+    uint keynr= optimal_key_order[range_index];
     {
-      if (table->quick_keys.is_set(keynr) &&
-          table->quick_key_parts[keynr] == quick_key_parts)
       {
         uint i;
         uint used_key_parts= table->quick_key_parts[keynr];
-- 
cgit v1.2.1