summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVarun Gupta <varun.gupta@mariadb.com>2021-02-05 23:54:28 +0530
committerVarun Gupta <varun.gupta@mariadb.com>2021-02-06 12:09:36 +0530
commit9b8c3ee965e5a736bc972a749bdd2e38ecacfad5 (patch)
tree742621383c7b7841d63927cfd8c4fbe2217592c3
parentf91641b4fa56ddddfb250b2cffbc464cd544e95f (diff)
downloadmariadb-git-10.5-mdev8306.tar.gz
Using recurisive descent instead of walk to check if the join cardinality estimates are precise10.5-mdev8306
-rw-r--r--mysql-test/main/join_cardinality.result2
-rw-r--r--sql/item.cc198
-rw-r--r--sql/item.h9
-rw-r--r--sql/item_cmpfunc.cc103
-rw-r--r--sql/item_cmpfunc.h16
-rw-r--r--sql/sql_select.cc7
6 files changed, 184 insertions, 151 deletions
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result
index e41438e1a3e..e98de9c087b 100644
--- a/mysql-test/main/join_cardinality.result
+++ b/mysql-test/main/join_cardinality.result
@@ -733,7 +733,7 @@ SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate'))
[
- true
+ false
]
EXPLAIN SELECT * from t1
WHERE t1.a=t1.b AND (abs(t1.a),t1.b) IN ((1,1), (2,2), (3,3));
diff --git a/sql/item.cc b/sql/item.cc
index fe6c81b7b95..ae144402b64 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -7729,7 +7729,6 @@ Item *Item::build_pushable_condition(THD *thd, bool no_top_clones)
this argument will be a reference to a column that is used either
as the first component of an index or statistics are available via
statistical tables.
-
b. Equalities:
For an equality to have accurate selectivity estimates,
the number of distinct values for each column in the equality
@@ -7741,7 +7740,6 @@ Item *Item::build_pushable_condition(THD *thd, bool no_top_clones)
The number of distinct values for a column can be known by
1) from indexes via rec_per_key
2) from statistical tables via avg_frequency.
-
2. (recursive step)
AND / OR formula over formulas defined in section 1 of the
definition.
@@ -7784,11 +7782,14 @@ Item *Item::build_pushable_condition(THD *thd, bool no_top_clones)
In the end for all fields we may have selectivity from an index or
statistical tables.
+ 3. (optional additional non-recursive step)
+ Conjunction of formulas described in sections 1 and 2.
@notes
- The implementation for this function use the 'walk' method to traverse
- the tree of this item with predicate_selectivity_checker() as the
- call-back parameter of the method.
+ The implementation for this function use the recursive descent to walk
+ over the condition tree. A call-back function
+ predicate_selectivity_checker() is passed as a parameter to this function.
+
propagate_equal_fields() is called before this function is called so
Item_field::item_equal and Item_direct_view_ref::item_equal is set.
@@ -7797,27 +7798,55 @@ Item *Item::build_pushable_condition(THD *thd, bool no_top_clones)
FALSE OTHERWISE
*/
-bool Item::with_accurate_selectivity_estimation()
+bool Item::with_accurate_selectivity_estimation(Predicate_checker checker,
+ void *arg, bool top_level)
{
- /*
- For the below test one could use a virtual function but that would
- take a lot of space for other item as there will be entires in the vtable
- */
- if (type() == Item::COND_ITEM &&
- ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC)
+ if (type() == Item::COND_ITEM)
{
- List_iterator<Item> li(*((Item_cond*) this)->argument_list());
+ Item_cond_and *and_cond=
+ (((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) ?
+ ((Item_cond_and*) this) : 0;
+
+ List<Item> *arg_list= ((Item_cond*) this)->argument_list();
+ List_iterator<Item> li(*arg_list);
Item *item;
- while ((item= li++))
+
+ if (and_cond)
+ {
+ uint counter=0;
+ while ((item=li++))
+ {
+ SAME_FIELD arg_same_field= {NULL, FALSE};
+ if (item->with_accurate_selectivity_estimation(checker,
+ (top_level ?
+ (void *)&arg_same_field :
+ arg),
+ top_level))
+ {
+ counter++;
+ }
+ }
+
+ if (counter == arg_list->elements)
+ return true;
+ }
+ else
{
- SAME_FIELD arg= {NULL, false};
- if (item->walk(&Item::predicate_selectivity_checker, 0, &arg))
- return false;
+ uint counter=0;
+ while ((item=li++))
+ {
+ if (item->with_accurate_selectivity_estimation(checker, arg, FALSE))
+ {
+ counter++;
+ }
+ }
+ if (counter == arg_list->elements)
+ return true;
}
- return true;
+ return false;
}
- SAME_FIELD arg= {NULL, false};
- return !walk(&Item::predicate_selectivity_checker, 0, &arg);
+
+ return !((this->*checker) (arg));
}
@@ -9551,81 +9580,6 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel)
}
-/*
- @brief
- Checks if a formula of a condition contains the same column
-
- @details
- In the function we try to check if a formula of a condition depends
- (directly or indirectly through equalities inferred from the
- conjuncted multiple equalities) only on one column.
-
- Eg:
- WHERE clause is:
- t1.a=t2.b and (t1.a > 5 or t2.b < 1);
-
- the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of
- equalities to conclude that it depends on one column.
-
- This is used mostly for OR conjuncts where we need to make sure
- that the entire OR conjunct contains only one column, so that we may
- get accurate estimates.
- An example with top level OR conjunct would be:
- WHERE A=1 or A between 100 and 200 or A > 1000
-
- @retval
- TRUE : the formula does not depend on one column
- FALSE : OTHERWISE
-*/
-
-bool Item_field::predicate_selectivity_checker(void *arg)
-{
- SAME_FIELD *same_field_arg= (SAME_FIELD*)arg;
-
- /*
- The same_field_arg is passed as a parameter because when we start walking
- over the condition tree we don't know which column the predicate will be
- dependent on. So as soon as we encounter a leaf of the condition tree
- which is a field item, we set the SAME_FIELD::item to the found
- field item and then compare the rest of the fields in the predicate with
- the field item.
- */
-
- if (same_field_arg->item == NULL)
- {
- same_field_arg->item= this;
- same_field_arg->is_stats_available=
- field->is_range_statistics_available() ||
- (item_equal && item_equal->is_statistics_available());
- return false;
- }
-
- DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM);
- /* Found the same field while traversing the condition tree */
- if (((Item_field*)same_field_arg->item->real_item())->field == field)
- return false;
-
- if (!same_field_arg->item->get_item_equal())
- return true;
-
- return !(same_field_arg->item->get_item_equal() == item_equal);
-}
-
-
-bool Item_direct_view_ref::predicate_selectivity_checker(void *arg)
-{
- SAME_FIELD *same_field_arg= (SAME_FIELD*)arg;
- if (same_field_arg->item == real_item())
- {
- DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM);
- same_field_arg->item= this;
- same_field_arg->is_stats_available|=
- (item_equal && item_equal->is_statistics_available());
- }
- return false;
-}
-
-
bool Item_direct_view_ref::excl_dep_on_tables(table_map tab_map,
bool multi_eq_checked)
{
@@ -11043,3 +10997,57 @@ bool Item::is_non_const_field_item()
return true;
return false;
}
+
+
+/*
+ @brief
+ Checks if a formula of a condition contains the same column
+
+ @details
+ In the function we try to check if a formula of a condition depends
+ (directly or indirectly through equalities inferred from the
+ conjuncted multiple equalities) only on one column.
+
+ Eg:
+ WHERE clause is:
+ t1.a=t2.b and (t1.a > 5 or t2.b < 1);
+
+ the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of
+ equalities to conclude that it depends on one column.
+
+ This is used mostly for OR conjuncts where we need to make sure
+ that the entire OR conjunct contains only one column, so that we may
+ get accurate estimates.
+ An example with top level OR conjunct would be:
+ WHERE A=1 or A between 100 and 200 or A > 1000
+
+ @retval
+ TRUE : the formula depends on one column
+ FALSE : OTHERWISE
+*/
+
+bool Item::is_resolved_by_same_column(void *arg)
+{
+ SAME_FIELD *same_field_arg= (SAME_FIELD*) arg;
+ if (same_field_arg->item == NULL)
+ {
+ Field *field= ((Item_field *)real_item())->field;
+ same_field_arg->item= this;
+ same_field_arg->is_stats_available=
+ (field->is_range_statistics_available() ||
+ (get_item_equal() &&
+ get_item_equal()->is_statistics_available()));
+ return true;
+ }
+
+ Item *item= same_field_arg->item;
+
+ if (((Item_field*)item->real_item())->field ==
+ ((Item_field*)real_item())->field)
+ return true;
+
+ if (!item->get_item_equal())
+ return false;
+
+ return (item->get_item_equal() == get_item_equal());
+}
diff --git a/sql/item.h b/sql/item.h
index 61ce5d230e0..2e2993ccd2f 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -624,6 +624,9 @@ typedef bool (Item::*Item_analyzer) (uchar **argp);
typedef Item* (Item::*Item_transformer) (THD *thd, uchar *arg);
typedef void (*Cond_traverser) (const Item *item, void *arg);
typedef bool (Item::*Pushdown_checker) (uchar *arg);
+typedef bool (Item::*Predicate_checker) (void *arg);
+
+
struct st_cond_statistic;
@@ -1999,7 +2002,8 @@ public:
virtual bool find_selective_predicates_list_processor(void *arg) { return 0; }
- bool with_accurate_selectivity_estimation();
+ bool with_accurate_selectivity_estimation(Predicate_checker checker,
+ void *arg, bool top_level);
/*
@brief
@@ -2546,6 +2550,7 @@ public:
*/
bool pushable_equality_checker_for_subquery(uchar *arg);
bool is_non_const_field_item();
+ bool is_resolved_by_same_column(void *arg);
};
MEM_ROOT *get_thd_memroot(THD *thd);
@@ -3654,7 +3659,6 @@ public:
return field->table->pos_in_table_list->outer_join;
}
bool check_index_dependence(void *arg);
- bool predicate_selectivity_checker(void *arg);
Item *replace_with_nest_items(THD *thd, uchar *arg);
friend class Item_default_value;
friend class Item_insert_value;
@@ -6029,7 +6033,6 @@ public:
Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg)
{ return this; }
Item *remove_item_direct_ref() { return this; }
- bool predicate_selectivity_checker(void *arg);
};
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 317ac38a10f..ad326cbdd78 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -2112,18 +2112,16 @@ bool Item_func_between::count_sargable_conds(void *arg)
return 0;
}
-bool Item_func_between::predicate_selectivity_checker(void *arg)
-{
- SAME_FIELD *field_arg= (SAME_FIELD*)arg;
- if (!field_arg->is_stats_available)
- return true;
+bool Item_func_between::predicate_selectivity_checker2(void *arg)
+{
if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
{
- if (is_range_predicate(args[0], args[1]) &&
- is_range_predicate(args[0], args[2]))
- return false;
- return true;
+ if (!(is_range_predicate(args[0], args[1]) &&
+ is_range_predicate(args[0], args[2])))
+ return true;
+ return !(args[0]->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available);
}
for (uint i= 1 ; i < arg_count ; i++)
@@ -2132,6 +2130,9 @@ bool Item_func_between::predicate_selectivity_checker(void *arg)
{
if (!is_range_predicate(args[i], args[0]))
return true;
+ if (!((args[i]->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available)))
+ return true;
}
}
return false;
@@ -4321,13 +4322,25 @@ bool Item_func_in::count_sargable_conds(void *arg)
}
-bool Item_func_in::predicate_selectivity_checker(void *arg)
+bool Item_func_in::predicate_selectivity_checker2(void *arg)
{
- SAME_FIELD *field_arg= (SAME_FIELD*)arg;
- if (!field_arg->is_stats_available)
+ if (!all_items_are_consts(args + 1, arg_count - 1))
return true;
- if (all_items_are_consts(args + 1, arg_count - 1))
- return false;
+
+ if (args[0]->type() == Item::ROW_ITEM)
+ {
+ /*
+ Currently not walking inside the row item to check if all the fields are
+ same or not, that is not a case that users would be using, so just
+ disallowing such predicates and returning that selectivity for the
+ predicate is not available
+ */
+ return true;
+ }
+
+ if (args[0]->is_non_const_field_item())
+ return !(args[0]->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available);
return true;
}
@@ -5560,15 +5573,12 @@ bool Item_func_null_predicate::count_sargable_conds(void *arg)
}
-bool Item_func_null_predicate::predicate_selectivity_checker(void *arg)
+bool Item_func_null_predicate::predicate_selectivity_checker2(void *arg)
{
- SAME_FIELD *field_arg= (SAME_FIELD*)arg;
- if (!field_arg->is_stats_available)
+ if (!args[0]->is_non_const_field_item())
return true;
-
- if (args[0]->is_non_const_field_item())
- return false;
- return true;
+ return !(args[0]->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available);
}
@@ -5654,16 +5664,16 @@ bool Item_bool_func2::count_sargable_conds(void *arg)
}
-bool Item_bool_func2::predicate_selectivity_checker(void *arg)
+bool Item_bool_func2::predicate_selectivity_checker2(void *arg)
{
- SAME_FIELD *field_arg= (SAME_FIELD*)arg;
- if (!field_arg->is_stats_available)
+ if (!(is_range_predicate(args[0], args[1]) ||
+ is_range_predicate(args[1], args[0])))
return true;
- if (is_range_predicate(args[0], args[1]) ||
- is_range_predicate(args[1], args[0]))
- return false;
- return true;
+ Item *item= args[0]->is_non_const_field_item() ? args[0] : args[1];
+
+ return !(item->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available);
}
@@ -5772,15 +5782,12 @@ SEL_TREE *Item_func_like::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr)
}
-bool Item_func_like::predicate_selectivity_checker(void *arg)
+bool Item_func_like::predicate_selectivity_checker2(void *arg)
{
- SAME_FIELD *field_arg= (SAME_FIELD*)arg;
- if (!field_arg->is_stats_available)
+ if (!with_sargable_pattern())
return true;
-
- if (with_sargable_pattern())
- return false;
- return true;
+ return !(args[0]->is_resolved_by_same_column(arg) &&
+ ((SAME_FIELD*)arg)->is_stats_available);
}
@@ -7236,7 +7243,7 @@ bool Item_equal::count_sargable_conds(void *arg)
}
-bool Item_equal::predicate_selectivity_checker(void *arg)
+bool Item_equal::predicate_selectivity_checker2(void *arg)
{
/*
For equality conditions like tbl1.col = tbl2.col
@@ -7254,17 +7261,29 @@ bool Item_equal::predicate_selectivity_checker(void *arg)
field->is_ndv_available()))
return true;
}
- return false;
+ }
+ else
+ {
+ while (it++)
+ {
+ Field *field= it.get_curr_field();
+ field->is_range_statistics_available();
+ if (!field->is_ndv_available())
+ return true;
+ }
}
+ it.rewind();
+ Item *item= (it++);
+ SAME_FIELD *same_field_arg= (SAME_FIELD *) arg;
- while (it++)
+ if (same_field_arg->item == NULL)
{
- Field *field= it.get_curr_field();
- if (!(field->is_ndv_available()))
- return true;
+ item->is_resolved_by_same_column(arg);
+ return false;
}
- return false;
+
+ return !item->is_resolved_by_same_column(arg);
}
diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h
index 12cecac8ec2..cbb9175eb48 100644
--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@@ -227,7 +227,6 @@ public:
bool fix_length_and_dec() { decimals=0; max_length=1; return FALSE; }
uint decimal_precision() const { return 1; }
bool need_parentheses_in_default() { return true; }
- bool predicate_selectivity_checker(void *arg) { return TRUE; }
};
@@ -420,7 +419,8 @@ public:
COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value,
bool top_level);
bool count_sargable_conds(void *arg);
- bool predicate_selectivity_checker(void *arg);
+ bool predicate_selectivity_checker2(void *arg);
+
/*
Specifies which result type the function uses to compare its arguments.
This method is used in equal field propagation.
@@ -939,7 +939,7 @@ public:
bool find_not_null_fields(table_map allowed);
void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge);
bool count_sargable_conds(void *arg);
- bool predicate_selectivity_checker(void *arg);
+ bool predicate_selectivity_checker2(void *arg);
void add_key_fields(JOIN *join, KEY_FIELD **key_fields,
uint *and_level, table_map usable_tables,
SARGABLE_PARAM **sargables);
@@ -2477,7 +2477,7 @@ public:
bool find_not_null_fields(table_map allowed);
void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge);
bool count_sargable_conds(void *arg);
- bool predicate_selectivity_checker(void *arg);
+ bool predicate_selectivity_checker2(void *arg);
Item *get_copy(THD *thd)
{ return get_item_copy<Item_func_in>(thd, this); }
Item *build_clone(THD *thd)
@@ -2575,7 +2575,7 @@ public:
return FALSE;
}
bool count_sargable_conds(void *arg);
- bool predicate_selectivity_checker(void *arg);
+ bool predicate_selectivity_checker2(void *arg);
};
@@ -2826,7 +2826,7 @@ public:
Item *get_copy(THD *thd)
{ return get_item_copy<Item_func_like>(thd, this); }
- bool predicate_selectivity_checker(void *arg);
+ bool predicate_selectivity_checker2(void *arg);
};
@@ -3236,7 +3236,7 @@ public:
uint elements_count() { return equal_items.elements; }
friend class Item_equal_fields_iterator;
bool count_sargable_conds(void *arg);
- bool predicate_selectivity_checker(void *arg);
+ bool predicate_selectivity_checker2(void *arg);
bool is_statistics_available();
Item *multiple_equality_transformer(THD *thd, uchar *arg);
friend class Item_equal_iterator<List_iterator_fast,Item>;
@@ -3389,7 +3389,6 @@ public:
SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr);
Item *get_copy(THD *thd)
{ return get_item_copy<Item_cond_and>(thd, this); }
- bool predicate_selectivity_checker(void *arg) { return FALSE; }
};
inline bool is_cond_and(Item *item)
@@ -3414,7 +3413,6 @@ public:
Item *neg_transformer(THD *thd);
Item *get_copy(THD *thd)
{ return get_item_copy<Item_cond_or>(thd, this); }
- bool predicate_selectivity_checker(void *arg) { return FALSE; }
};
class Item_func_dyncol_check :public Item_bool_func
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 896277da6cc..a4464299659 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -8486,7 +8486,12 @@ choose_plan(JOIN *join, table_map join_tables)
if (join->conds && (join->sort_nest_possible || thd->trace_started()))
{
bool cardinality_accurate;
- cardinality_accurate= join->conds->with_accurate_selectivity_estimation();
+ SAME_FIELD arg= {NULL, FALSE};
+ cardinality_accurate=
+ join->conds->
+ with_accurate_selectivity_estimation(&Item::predicate_selectivity_checker,
+ &arg, TRUE);
+
wrapper.add("cardinality_accurate", cardinality_accurate);
if (!cardinality_accurate)