summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVarun Gupta <varun.gupta@mariadb.com>2021-01-09 00:36:13 +0530
committerVarun Gupta <varun.gupta@mariadb.com>2021-01-11 13:23:06 +0530
commit7220e3719d312e194c0ce00c0c66a770360835fa (patch)
treeb5ad8ec9efebd4ca671e5f54acee4a4e1a17fc39
parent8b7ad4e69750dcf6663203de5f4a1a082bf383be (diff)
downloadmariadb-git-7220e3719d312e194c0ce00c0c66a770360835fa.tar.gz
Addressing review
-rw-r--r--mysql-test/include/join_cardinality.inc7
-rw-r--r--mysql-test/main/join_cardinality.result36
-rw-r--r--mysql-test/main/join_cardinality.test12
-rw-r--r--sql/field.cc76
-rw-r--r--sql/field.h15
-rw-r--r--sql/item.cc2
-rw-r--r--sql/item_cmpfunc.cc2
-rw-r--r--sql/sql_select.cc4
-rw-r--r--sql/sql_statistics.cc3
9 files changed, 87 insertions, 70 deletions
diff --git a/mysql-test/include/join_cardinality.inc b/mysql-test/include/join_cardinality.inc
index 1ab6edb75cb..8d258a95324 100644
--- a/mysql-test/include/join_cardinality.inc
+++ b/mysql-test/include/join_cardinality.inc
@@ -43,7 +43,7 @@ FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
--echo #
--echo # The cardinality here would be not accurate because we don't have
---echo # selectivity for the predicate t1.b=10
+--echo # selectivity for the predicate t1.c=10
--echo #
EXPLAIN SELECT * FROM t1 WHERE t1.c = 10;
@@ -129,3 +129,8 @@ EXPLAIN SELECT * from t1
WHERE t1.a = (select t2.a from t2 where t1.b=t2.b limit 1);
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+
+EXPLAIN SELECT * from t1
+WHERE t1.a = (select t2.a from t2 where t2.b > 10 limit 1) AND abs(t1.b)= 11;
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result
index edcbf2f04e7..b74ede7939c 100644
--- a/mysql-test/main/join_cardinality.result
+++ b/mysql-test/main/join_cardinality.result
@@ -1,10 +1,10 @@
+SET @save_optimizer_switch=@@optimizer_switch;
+SET optimizer_trace=1;
+SET optimizer_switch='rowid_filter=off';
CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a));
INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100;
CREATE TABLE t2(a INT, b INT, key(b));
INSERT INTO t2 SELECT seq, seq from seq_1_to_100;
-SET @save_optimizer_switch=@@optimizer_switch;
-SET optimizer_trace=1;
-SET optimizer_switch='rowid_filter=off';
#
# RUNNING the join cardinality tests when statistics are
# available only from indexes
@@ -98,7 +98,7 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
]
#
# The cardinality here would be not accurate because we don't have
-# selectivity for the predicate t1.b=10
+# selectivity for the predicate t1.c=10
#
EXPLAIN SELECT * FROM t1 WHERE t1.c = 10;
id select_type table type possible_keys key key_len ref rows Extra
@@ -264,6 +264,18 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
false,
false
]
+EXPLAIN SELECT * from t1
+WHERE t1.a = (select t2.a from t2 where t2.b > 10 limit 1) AND abs(t1.b)= 11;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ref a a 5 const 1 Using where
+2 SUBQUERY t2 range b b 5 NULL 95 Using index condition
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true,
+ false
+]
DROP TABLE t1,t2;
CREATE TABLE t1(a INT, b INT, c INT);
INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100;
@@ -362,7 +374,7 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
]
#
# The cardinality here would be not accurate because we don't have
-# selectivity for the predicate t1.b=10
+# selectivity for the predicate t1.c=10
#
EXPLAIN SELECT * FROM t1 WHERE t1.c = 10;
id select_type table type possible_keys key key_len ref rows Extra
@@ -528,6 +540,18 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
false,
false
]
+EXPLAIN SELECT * from t1
+WHERE t1.a = (select t2.a from t2 where t2.b > 10 limit 1) AND abs(t1.b)= 11;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where
+2 SUBQUERY t2 ALL NULL NULL NULL NULL 100 Using where
+SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
+JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
+[
+ true,
+ false
+]
DROP TABLE t1,t2;
#
# Combination with statistics from stat tables and
@@ -820,7 +844,7 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
DROP TABLE t1,t2;
DROP VIEW v1;
#
-# TESTS to get ndv from rec_per_key
+# TESTS to get Number of distinct values(ndv) from rec_per_key
#
CREATE TABLE t1 (a INT, b INT);
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_10;
diff --git a/mysql-test/main/join_cardinality.test b/mysql-test/main/join_cardinality.test
index 696025bdecf..d0f8b74e99b 100644
--- a/mysql-test/main/join_cardinality.test
+++ b/mysql-test/main/join_cardinality.test
@@ -1,16 +1,16 @@
--source include/have_sequence.inc
+# Default settings for all following tests
+SET @save_optimizer_switch=@@optimizer_switch;
+SET optimizer_trace=1;
+SET optimizer_switch='rowid_filter=off';
+
CREATE TABLE t1(a INT, b INT, c INT, KEY(b), KEY(a));
INSERT INTO t1 SELECT seq, seq, seq from seq_1_to_100;
CREATE TABLE t2(a INT, b INT, key(b));
INSERT INTO t2 SELECT seq, seq from seq_1_to_100;
-
-SET @save_optimizer_switch=@@optimizer_switch;
-SET optimizer_trace=1;
-SET optimizer_switch='rowid_filter=off';
-
--echo #
--echo # RUNNING the join cardinality tests when statistics are
--echo # available only from indexes
@@ -224,7 +224,7 @@ DROP VIEW v1;
--echo #
---echo # TESTS to get ndv from rec_per_key
+--echo # TESTS to get Number of distinct values(ndv) from rec_per_key
--echo #
CREATE TABLE t1 (a INT, b INT);
diff --git a/sql/field.cc b/sql/field.cc
index 5e4d29045a3..51938bab9b3 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -11369,7 +11369,7 @@ void Field::print_key_value_binary(String *out, const uchar* key, uint32 length)
If the column is the first component of a key, then statistics
for the column are available from the range optimizer.
Sets the bit in Field::stats_table
- a) NDV is available
+ a) Number of distinct values(NDV) is available
b) Statistics are available for the non-const argument of a
range predicate
*/
@@ -11381,15 +11381,12 @@ void Field::statistics_available_via_keys()
while ((key= it++) != key_map::Iterator::BITMAP_END)
{
KEY *keyinfo= table->key_info + key;
- if (keyinfo->usable_key_parts == 1 &&
- field_index + 1 == keyinfo->key_part->fieldnr)
+ DBUG_ASSERT(field_index + 1 == keyinfo->key_part->fieldnr);
+ stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
+ if (keyinfo->actual_rec_per_key(0))
{
- stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
- if (keyinfo->actual_rec_per_key(0))
- {
- stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
- return;
- }
+ stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+ return;
}
}
}
@@ -11402,11 +11399,12 @@ void Field::statistics_available_via_keys()
void Field::statistics_available_via_stat_tables()
{
- if (!(read_stats && !read_stats->no_stat_values_provided()))
- return;
- stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
- if (!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY))
- stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+ if (read_stats && !read_stats->no_stat_values_provided())
+ {
+ stats_available|= (1 << STATISTICS_FOR_RANGE_PREDICATES_AVAILABLE);
+ if (!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY))
+ stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+ }
}
@@ -11414,14 +11412,19 @@ void Field::statistics_available_via_stat_tables()
@brief
Check if statistics for a column are available via indexes or stat tables
+ @details
+ This function checks if there are statistics for a column via indexes
+ or stat tables. Also if the member Field::stats_available
+ has not been updated then update it
+
@retval
TRUE : statistics available for the column
FALSE : OTHERWISE
*/
-bool Field::is_statistics_available()
+bool Field::is_range_statistics_available()
{
- if (!(stats_available & (1 << STATISTICS_CACHED)))
+ if (!stats_available)
{
statistics_available_via_keys();
statistics_available_via_stat_tables();
@@ -11433,7 +11436,8 @@ bool Field::is_statistics_available()
/*
@brief
- Check if ndv for a column are available via indexes or stat tables
+ Check if number of distinct values (NDV) for a column are available
+ via indexes or stat tables
@retval
TRUE : ndv available for the column
@@ -11442,12 +11446,11 @@ bool Field::is_statistics_available()
bool Field::is_ndv_available()
{
- if (!(stats_available & (1 << STATISTICS_CACHED)))
+ if (!stats_available)
{
- bool res= is_ndv_available_via_keys() ||
- is_ndv_available_via_stat_tables();
stats_available|= (1 << STATISTICS_CACHED);
- return res;
+ return is_ndv_available_via_keys() ||
+ is_ndv_available_via_stat_tables();
}
return (stats_available & (1 << STATISTICS_FOR_NDV_AVAILABLE));
}
@@ -11469,7 +11472,7 @@ bool Field::is_ndv_available_via_keys()
while ((key= it++) != key_map::Iterator::BITMAP_END)
{
KEY *keyinfo= table->key_info + key;
- if (is_first_component_of_key(keyinfo) && keyinfo->actual_rec_per_key(0))
+ if (keyinfo->actual_rec_per_key(0))
{
stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
return true;
@@ -11490,30 +11493,13 @@ bool Field::is_ndv_available_via_keys()
bool Field::is_ndv_available_via_stat_tables()
{
- if (!(read_stats && !read_stats->no_stat_values_provided() &&
+ if ((read_stats && !read_stats->no_stat_values_provided() &&
!read_stats->is_null(COLUMN_STAT_AVG_FREQUENCY)))
- return false;
- stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
- return true;
-}
-
-
-/*
- @brief
- Checks if a field is the first component of a given key
-
- @param
- key given key
-
- @retval
- TRUE : field is the first component of the given key
- FALSE : otherwise
-*/
-
-bool Field::is_first_component_of_key(KEY *key)
-{
- DBUG_ASSERT(key->usable_key_parts >= 1);
- return field_index + 1 == key->key_part->fieldnr;
+ {
+ stats_available|= (1 << STATISTICS_FOR_NDV_AVAILABLE);
+ return true;
+ }
+ return false;
}
diff --git a/sql/field.h b/sql/field.h
index e4a1e2b12c4..18633f8f1fe 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -832,6 +832,13 @@ public:
uint32 flags;
uint16 field_index; // field number in fields array
uchar null_bit; // Bit used to test null bit
+
+ /*
+ Caches the value of whether statistics are available for a field
+ This is reset for each query in THD::init()
+ */
+ uint8 stats_available;
+
/**
If true, this field was created in create_tmp_field_from_item from a NULL
value. This means that the type of the field is just a guess, and the type
@@ -876,11 +883,6 @@ public:
STATISTICS_FOR_NDV_AVAILABLE
};
- /*
- Caches the value of whether statistics are available for a field or not.
- */
- uint stats_available;
-
/*
This is additional data provided for any computed(virtual) field,
default function or check constraint.
@@ -1919,10 +1921,9 @@ public:
/* Mark field in read map. Updates also virtual fields */
void register_field_in_read_map();
- bool is_first_component_of_key(KEY *key);
void statistics_available_via_keys();
void statistics_available_via_stat_tables();
- bool is_statistics_available();
+ bool is_range_statistics_available();
bool is_ndv_available();
bool is_ndv_available_via_stat_tables();
bool is_ndv_available_via_keys();
diff --git a/sql/item.cc b/sql/item.cc
index c72f488c81d..71b73ae314f 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -9367,7 +9367,7 @@ bool Item_field::predicate_selectivity_checker(void *arg)
{
same_field_arg->item= this;
same_field_arg->is_stats_available=
- field->is_statistics_available() ||
+ field->is_range_statistics_available() ||
(item_equal && item_equal->is_statistics_available());
return false;
}
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 711f1b939a9..4fe1d78af45 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -7244,7 +7244,7 @@ bool Item_equal::is_statistics_available()
while (it++)
{
Field *field= it.get_curr_field();
- if (field->is_statistics_available())
+ if (field->is_range_statistics_available())
return true;
}
return false;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 3553ee78c5b..c04eda05c57 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -29478,6 +29478,10 @@ void unpack_to_base_table_fields(TABLE *table)
bool is_range_predicate(Item *item, Item *value)
{
+ /*
+ calling real_item() here so that if the item is a REF_ITEM
+ then we would get the item field it is referring to
+ */
Item *field= item->real_item();
if (field->type() == Item::FIELD_ITEM && !field->const_item() &&
(!value || !value->is_expensive()))
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 71d472feaba..5a93df13cf8 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -3158,10 +3158,7 @@ static void dump_stats_from_share_to_table(TABLE *table)
Field **field_ptr= table_share->field;
Field **table_field_ptr= table->field;
for ( ; *field_ptr; field_ptr++, table_field_ptr++)
- {
(*table_field_ptr)->read_stats= (*field_ptr)->read_stats;
- (*table_field_ptr)->stats_available= (*field_ptr)->stats_available;
- }
table->stats_is_read= true;
}