Addressing review

author: Varun Gupta <varun.gupta@mariadb.com> 2021-01-09 00:36:13 +0530
committer: Varun Gupta <varun.gupta@mariadb.com> 2021-01-11 13:23:06 +0530
commit: 7220e3719d312e194c0ce00c0c66a770360835fa (patch)
tree: b5ad8ec9efebd4ca671e5f54acee4a4e1a17fc39
parent: 8b7ad4e69750dcf6663203de5f4a1a082bf383be (diff)
download: mariadb-git-7220e3719d312e194c0ce00c0c66a770360835fa.tar.gz
9 files changed, 87 insertions, 70 deletions
diff --git a/mysql-test/include/join_cardinality.inc b/mysql-test/include/join_cardinality.inc
index 1ab6edb75cb..8d258a95324 100644
--- a/mysql-test/include/join_cardinality.inc
+++ b/mysql-test/include/join_cardinality.inc
@@ -43,7 +43,7 @@ FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
 
 --echo #
 --echo # The cardinality here would be not accurate because we don't have
---echo # selectivity for the predicate t1.b=10
+--echo # selectivity for the predicate t1.c=10
 --echo #
 
 EXPLAIN SELECT * FROM t1 WHERE t1.c = 10;
@@ -129,3 +129,8 @@ EXPLAIN SELECT * from t1
 WHERE t1.a = (select t2.a from t2 where t1.b=t2.b limit 1);
 SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
 FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1
+WHERE t1.a = (select t2.a from t2 where t2.b > 10 limit 1) AND abs(t1.b)= 11;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result
index edcbf2f04e7..b74ede7939c 100644
--- a/mysql-test/main/join_cardinality.result
+++ b/mysql-test/main/join_cardinality.result
@@ -1,10 +1,10 @@
+SET @save_optimizer_switch=@@optimizer_switch;
+SET optimizer_trace=1;
+SET optimizer_switch='rowid_filter=off';
 CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a));
 INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100;
 CREATE TABLE t2(a INT, b INT, key(b));
 INSERT INTO t2 SELECT seq, seq from seq_1_to_100;
-SET @save_optimizer_switch=@@optimizer_switch;
-SET optimizer_trace=1;
-SET optimizer_switch='rowid_filter=off';
 #
 # RUNNING the join cardinality tests when statistics are
 # available only from indexes
@@ -98,7 +98,7 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
 ]
 #
 # The cardinality here would be not accurate because we don't have
-# selectivity for the predicate t1.b=10
+# selectivity for the predicate t1.c=10
 #
 EXPLAIN SELECT * FROM t1 WHERE t1.c = 10;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -264,6 +264,18 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
     false,
     false
 ]
+EXPLAIN SELECT * from t1
+WHERE t1.a = (select t2.a from t2 where t2.b > 10 limit 1) AND abs(t1.b)= 11;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ref	a	a	5	const	1	Using where
+2	SUBQUERY	t2	range	b	b	5	NULL	95	Using index condition
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+    true,
+    false
+]
 DROP TABLE t1,t2;
 CREATE TABLE t1(a INT, b INT, c INT);
 INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100;
@@ -362,7 +374,7 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
 ]
 #
 # The cardinality here would be not accurate because we don't have
-# selectivity for the predicate t1.b=10
+# selectivity for the predicate t1.c=10
 #
 EXPLAIN SELECT * FROM t1 WHERE t1.c = 10;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -528,6 +540,18 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
     false,
     false
 ]
+EXPLAIN SELECT * from t1
+WHERE t1.a = (select t2.a from t2 where t2.b > 10 limit 1) AND abs(t1.b)= 11;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	100	Using where
+2	SUBQUERY	t2	ALL	NULL	NULL	NULL	NULL	100	Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+    true,
+    false
+]
 DROP TABLE t1,t2;
 #
 # Combination with statistics from stat tables and
@@ -820,7 +844,7 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
 DROP TABLE t1,t2;
 DROP VIEW v1;
 #
-# TESTS to get ndv from rec_per_key 
+# TESTS to get Number of distinct values(ndv) from rec_per_key 
 #
 CREATE TABLE t1 (a INT, b INT);
 INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10;
diff --git a/mysql-test/main/join_cardinality.test b/mysql-test/main/join_cardinality.test
index 696025bdecf..d0f8b74e99b 100644
--- a/mysql-test/main/join_cardinality.test
+++ b/mysql-test/main/join_cardinality.test
@@ -1,16 +1,16 @@
 --source include/have_sequence.inc
 
+# Default settings for all following tests
+SET @save_optimizer_switch=@@optimizer_switch;
+SET optimizer_trace=1;
+SET optimizer_switch='rowid_filter=off';
+
 CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a));
 INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100;
 
 CREATE TABLE t2(a INT, b INT, key(b));
 INSERT INTO t2 SELECT seq, seq from seq_1_to_100;
 
-
-SET @save_optimizer_switch=@@optimizer_switch;
-SET optimizer_trace=1;
-SET optimizer_switch='rowid_filter=off';
-
 --echo #
 --echo # RUNNING the join cardinality tests when statistics are
 --echo # available only from indexes
@@ -224,7 +224,7 @@ DROP VIEW v1;
 
 
 --echo #
---echo # TESTS to get ndv from rec_per_key 
+--echo # TESTS to get Number of distinct values(ndv) from rec_per_key
 --echo #
 
 CREATE TABLE t1 (a INT, b INT);
diff --git a/sql/field.cc b/sql/field.cc
index 5e4d29045a3..51938bab9b3 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -11369,7 +11369,7 @@ void Field::print_key_value_binary(String *out, const uchar* key, uint32 length)
     If the column is the first component of a key, then statistics
     for the column are available from the range optimizer.
     Sets the bit in Field::stats_table
-      a) NDV is available
+      a)  Number of distinct values(NDV) is available
       b) Statistics are available for the non-const argument of a
          range predicate
 */
@@ -11381,15 +11381,12 @@ void Field::statistics_available_via_keys()
   while ((key= it++) != key_map::Iterator::BITMAP_END)
   {
     KEY *keyinfo= table->key_info + key;
-    if (keyinfo->usable_key_parts == 1 &&
-        field_index + 1 == keyinfo->key_part->fieldnr)
+    DBUG_ASSERT(field_index + 1 == keyinfo->key_part->fieldnr);
+    stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
+    if (keyinfo->actual_rec_per_key(0))
     {
-      stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
-      if (keyinfo->actual_rec_per_key(0))
-      {
-        stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
-        return;
-      }
+      stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+      return;
     }
   }
 }
@@ -11402,11 +11399,12 @@ void Field::statistics_available_via_keys()
 
 void Field::statistics_available_via_stat_tables()
 {
-  if (!(read_stats && !read_stats->no_stat_values_provided()))
-    return;
-  stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
-  if (!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY))
-    stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+  if (read_stats && !read_stats->no_stat_values_provided())
+  {
+    stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
+    if (!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY))
+      stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+  }
 }
 
 
@@ -11414,14 +11412,19 @@ void Field::statistics_available_via_stat_tables()
   @brief
     Check if statistics for a column are available via indexes or stat tables
 
+  @details
+    This function checks if there are statistics for a column via indexes
+    or stat tables. Also if the member Field::stats_available
+    has not been updated then update it
+
   @retval
     TRUE   : statistics available for the column
     FALSE  : OTHERWISE
 */
 
-bool Field::is_statistics_available()
+bool Field::is_range_statistics_available()
 {
-  if (!(stats_available & (1 << STATISTICS_CACHED)))
+  if (!stats_available)
   {
     statistics_available_via_keys();
     statistics_available_via_stat_tables();
@@ -11433,7 +11436,8 @@ bool Field::is_statistics_available()
 
 /*
   @brief
-    Check if ndv for a column are available via indexes or stat tables
+    Check if number of distinct values (NDV) for a column are available
+    via indexes or stat tables
 
   @retval
     TRUE   : ndv available for the column
@@ -11442,12 +11446,11 @@ bool Field::is_statistics_available()
 
 bool Field::is_ndv_available()
 {
-  if (!(stats_available & (1 << STATISTICS_CACHED)))
+  if (!stats_available)
   {
-    bool res= is_ndv_available_via_keys() ||
-              is_ndv_available_via_stat_tables();
     stats_available|= (1 << STATISTICS_CACHED);
-    return res;
+    return is_ndv_available_via_keys() ||
+           is_ndv_available_via_stat_tables();
   }
   return (stats_available & (1 << STATISTICS_FOR_NDV_AVAILABLE));
 }
@@ -11469,7 +11472,7 @@ bool Field::is_ndv_available_via_keys()
   while ((key= it++) != key_map::Iterator::BITMAP_END)
   {
     KEY *keyinfo= table->key_info + key;
-    if (is_first_component_of_key(keyinfo) && keyinfo->actual_rec_per_key(0))
+    if (keyinfo->actual_rec_per_key(0))
     {
       stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
       return true;
@@ -11490,30 +11493,13 @@ bool Field::is_ndv_available_via_keys()
 
 bool Field::is_ndv_available_via_stat_tables()
 {
-  if (!(read_stats && !read_stats->no_stat_values_provided() &&
+  if ((read_stats && !read_stats->no_stat_values_provided() &&
         !read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY)))
-    return false;
-  stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
-  return true;
-}
-
-
-/*
-  @brief
-    Checks if a field is the first component of a given key
-
-  @param
-    key      given key
-
-  @retval
-    TRUE     : field is the first component of the given key
-    FALSE    : otherwise
-*/
-
-bool Field::is_first_component_of_key(KEY *key)
-{
-  DBUG_ASSERT(key->usable_key_parts >= 1);
-  return field_index + 1 == key->key_part->fieldnr;
+  {
+    stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+    return true;
+  }
+  return false;
 }
 
 
diff --git a/sql/field.h b/sql/field.h
index e4a1e2b12c4..18633f8f1fe 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -832,6 +832,13 @@ public:
   uint32	flags;
   uint16        field_index;            // field number in fields array
   uchar		null_bit;		// Bit used to test null bit
+
+  /*
+    Caches the value of whether statistics are available for a field
+    This is reset for each query in THD::init()
+  */
+  uint8 stats_available;
+
   /**
      If true, this field was created in create_tmp_field_from_item from a NULL
      value. This means that the type of the field is just a guess, and the type
@@ -876,11 +883,6 @@ public:
     STATISTICS_FOR_NDV_AVAILABLE
   };
 
-  /*
-    Caches the value of whether statistics are available for a field or not.
-  */
-  uint stats_available;
-
   /* 
     This is additional data provided for any computed(virtual) field,
     default function or check constraint.
@@ -1919,10 +1921,9 @@ public:
   /* Mark field in read map. Updates also virtual fields */
   void register_field_in_read_map();
 
-  bool is_first_component_of_key(KEY *key);
   void statistics_available_via_keys();
   void statistics_available_via_stat_tables();
-  bool is_statistics_available();
+  bool is_range_statistics_available();
   bool is_ndv_available();
   bool is_ndv_available_via_stat_tables();
   bool is_ndv_available_via_keys();
diff --git a/sql/item.cc b/sql/item.cc
index c72f488c81d..71b73ae314f 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -9367,7 +9367,7 @@ bool Item_field::predicate_selectivity_checker(void *arg)
   {
     same_field_arg->item= this;
     same_field_arg->is_stats_available=
-                     field->is_statistics_available() ||
+                     field->is_range_statistics_available() ||
                      (item_equal && item_equal->is_statistics_available());
     return false;
   }
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 711f1b939a9..4fe1d78af45 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -7244,7 +7244,7 @@ bool Item_equal::is_statistics_available()
   while (it++)
   {
     Field *field= it.get_curr_field();
-    if (field->is_statistics_available())
+    if (field->is_range_statistics_available())
       return true;
   }
   return false;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 3553ee78c5b..c04eda05c57 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -29478,6 +29478,10 @@ void unpack_to_base_table_fields(TABLE *table)
 
 bool is_range_predicate(Item *item, Item *value)
 {
+  /*
+    calling real_item() here so that if the item is a REF_ITEM
+    then we would get the item field it is referring to
+  */
   Item *field= item->real_item();
   if (field->type() == Item::FIELD_ITEM && !field->const_item() &&
       (!value || !value->is_expensive()))
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 71d472feaba..5a93df13cf8 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -3158,10 +3158,7 @@ static void dump_stats_from_share_to_table(TABLE *table)
   Field **field_ptr= table_share->field;
   Field **table_field_ptr= table->field;
   for ( ; *field_ptr; field_ptr++, table_field_ptr++)
-  {
     (*table_field_ptr)->read_stats= (*field_ptr)->read_stats;
-    (*table_field_ptr)->stats_available= (*field_ptr)->stats_available;
-  }
   table->stats_is_read= true;
 }
author	Varun Gupta <varun.gupta@mariadb.com>	2021-01-09 00:36:13 +0530
committer	Varun Gupta <varun.gupta@mariadb.com>	2021-01-11 13:23:06 +0530
commit	7220e3719d312e194c0ce00c0c66a770360835fa (patch)
tree	b5ad8ec9efebd4ca671e5f54acee4a4e1a17fc39
parent	8b7ad4e69750dcf6663203de5f4a1a082bf383be (diff)
download	mariadb-git-7220e3719d312e194c0ce00c0c66a770360835fa.tar.gz