From ef4d09948d5ff38f5dff8974005ba222a4b18462 Mon Sep 17 00:00:00 2001
From: Igor Babaev <igor@askmonty.org>
Date: Tue, 11 Apr 2023 21:21:45 -0700
Subject: MDEV-20773 Error from UPDATE when estimating selectivity of a range

This bug could affect multi-update statements as well as single-table
update statements processed as multi-updates when the where condition
contained a range condition over a non-indexed varchar column. The
optimizer calculates selectivity of such range conditions using histograms.
For each range the buckets containing endpoints of the the range are
determined with a procedure that stores the values of the endpoints in the
space of the record buffer where values of the columns are usually stored.
For a range over a varchar column the value of a endpoint may exceed the
size of the buffer and in such case the value is stored with truncation.
This truncations cannot affect the result of the calculation of the range
selectivity as the calculation employes only the beginning of the value
string. However it can trigger generation of an unexpected error on this
truncation if an update statement is processed.
This patch prohibits truncation messages when selectivity of a range
condition is calculated for a non-indexed column.

Approved by Oleksandr Byelkin <sanja@mariadb.com>
---
 sql/opt_range.cc | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'sql/opt_range.cc')

diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index c34420181a2..69a95f8da44 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3518,7 +3518,10 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
         }          
         else
         {
+          enum_check_fields save_count_cuted_fields= thd->count_cuted_fields;
+          thd->count_cuted_fields= CHECK_FIELD_IGNORE;
           rows= records_in_column_ranges(&param, idx, key);
+          thd->count_cuted_fields= save_count_cuted_fields;
           if (rows != DBL_MAX)
           {
             key->field->cond_selectivity= rows/table_records;
-- 
cgit v1.2.1


From be7ef6566fab6088b5222eae184226ed6b5994d3 Mon Sep 17 00:00:00 2001
From: Sergei Petrunia <sergey@mariadb.com>
Date: Tue, 28 Mar 2023 10:25:59 +0300
Subject: MDEV-30605: Wrong result while using index for group-by

A GROUP BY query which uses "MIN(pk)" and has "pk<>const" in the
WHERE clause would produce wrong result when handled with "Using index
for group-by".  Here "pk" column is the table's primary key.

The problem was introduced by fix for MDEV-23634. It made the range
optimizer to not produce ranges for conditions in form "pk != const".

However, LooseScan code requires that the optimizer is able to
convert the condition on the MIN/MAX column into an equivalent range.
The range is used to locate the row that has the MIN/MAX value.

LooseScan checks this in check_group_min_max_predicates(). This fix
makes the code in that function to take into account that "pk != const"
does not produce a range.
---
 sql/opt_range.cc | 46 ++++++++++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 18 deletions(-)

(limited to 'sql/opt_range.cc')

diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 223799a3235..82b19e23fd4 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -461,7 +461,7 @@ void print_range_for_non_indexed_field(String *out, Field *field,
 static void print_min_range_operator(String *out, const ha_rkey_function flag);
 static void print_max_range_operator(String *out, const ha_rkey_function flag);
 
-static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field);
+static bool is_field_an_unique_index(Field *field);
 
 /*
   SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
@@ -7752,8 +7752,13 @@ SEL_TREE *Item_func_ne::get_func_mm_tree(RANGE_OPT_PARAM *param,
     If this condition is a "col1<>...", where there is a UNIQUE KEY(col1),
     do not construct a SEL_TREE from it. A condition that excludes just one
     row in the table is not selective (unless there are only a few rows)
+
+    Note: this logic must be in sync with code in
+    check_group_min_max_predicates(). That function walks an Item* condition
+    and checks if the range optimizer would produce an equivalent range for
+    it.
   */
-  if (is_field_an_unique_index(param, field))
+  if (param->using_real_indexes && is_field_an_unique_index(field))
     DBUG_RETURN(NULL);
   DBUG_RETURN(get_ne_mm_tree(param, field, value, value));
 }
@@ -7865,7 +7870,7 @@ SEL_TREE *Item_func_in::get_func_mm_tree(RANGE_OPT_PARAM *param,
          - if there are a lot of constants, the overhead of building and
            processing enormous range list is not worth it.
       */
-      if (is_field_an_unique_index(param, field))
+      if (param->using_real_indexes && is_field_an_unique_index(field))
         DBUG_RETURN(0);
 
       /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval.  */
@@ -8574,24 +8579,18 @@ SEL_TREE *Item_equal::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
     In the future we could also add "almost unique" indexes where any value is
     present only in a few rows (but necessarily exactly one row)
 */
-static bool is_field_an_unique_index(RANGE_OPT_PARAM *param, Field *field)
+static bool is_field_an_unique_index(Field *field)
 {
   DBUG_ENTER("is_field_an_unique_index");
-
-  // The check for using_real_indexes is there because of the heuristics
-  // this function is used for.
-  if (param->using_real_indexes)
+  key_map::Iterator it(field->key_start);
+  uint key_no;
+  while ((key_no= it++) != key_map::Iterator::BITMAP_END)
   {
-    key_map::Iterator it(field->key_start);
-    uint key_no;
-    while ((key_no= it++) != key_map::Iterator::BITMAP_END)
+    KEY *key_info= &field->table->key_info[key_no];
+    if (key_info->user_defined_key_parts == 1 &&
+        (key_info->flags & HA_NOSAME))
     {
-      KEY *key_info= &field->table->key_info[key_no];
-      if (key_info->user_defined_key_parts == 1 &&
-          (key_info->flags & HA_NOSAME))
-      {
-        DBUG_RETURN(true);
-      }
+      DBUG_RETURN(true);
     }
   }
   DBUG_RETURN(false);
@@ -13475,7 +13474,7 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
          - (C between const_i and const_j)
          - C IS NULL
          - C IS NOT NULL
-         - C != const
+         - C != const  (unless C is the primary key)
     SA4. If Q has a GROUP BY clause, there are no other aggregate functions
          except MIN and MAX. For queries with DISTINCT, aggregate functions
          are allowed.
@@ -14358,6 +14357,17 @@ check_group_min_max_predicates(Item *cond, Item_field *min_max_arg_item,
         if (!simple_pred(pred, args, &inv))
           DBUG_RETURN(FALSE);
 
+        /*
+          Follow the logic in Item_func_ne::get_func_mm_tree(): condition
+          in form "tbl.primary_key <> const" is not used to produce intervals.
+
+          If the condition doesn't have an equivalent interval, this means we
+          fail LooseScan's condition SA3. Return FALSE to indicate this.
+        */
+        if (pred_type == Item_func::NE_FUNC &&
+            is_field_an_unique_index(min_max_arg_item->field))
+          DBUG_RETURN(FALSE);
+
         if (args[0] && args[1]) // this is a binary function or BETWEEN
         {
           DBUG_ASSERT(pred->fixed_type_handler());
-- 
cgit v1.2.1