summaryrefslogtreecommitdiff
path: root/sql/item.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/item.cc')
-rw-r--r--sql/item.cc213
1 files changed, 213 insertions, 0 deletions
diff --git a/sql/item.cc b/sql/item.cc
index 52274380cd1..f80db684bb3 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -7491,6 +7491,122 @@ Item *Item::build_pushable_cond(THD *thd,
}
+/*
+ @brief
+ Check if selectivity estimates are accurate for a conditional formula
+
+ @details
+ This function checks whether this item belongs to a certain class of
+ condition for which we can calculate an accurate selectivity estimate.
+
+ The definition of the class of condition is recursive.
+ 1. Simple formula
+ a. Formula in the form of range predicates:
+
+ The predicate would be of type:
+ col op const
+ where op can be
+
+ op:
+ | >
+ | >=
+ | <
+ | <=
+ | =
+ | <>
+ Also the other cases are with
+ [NOT] IN predicate,
+ [NOT] NULL predicate and
+ LIKE predicate.
+ The predicate should have only one non-constant argument and
+ this argument will be a reference to a column that is used either
+ as the first component of an index or statistics are available via
+ statistical tables.
+
+ b. Equalities:
+ For an equality to have accurate selectivity estimates,
+ the number of distinct values for each column in the equality
+ needs to be known.
+ Eg: t1.a= t2.a is transformed to MULTIPLE_EQUAL(t1.a, t2.a)
+ For this case we need to make sure we know number of distinct
+ values for t1.a and t2.a
+
+ The number of distinct values for a column can be known by
+ 1) from indexes via rec_per_key
+ 2) from statistical tables via avg_frequency.
+
+ 2. AND / OR formula over formulas defined in section 1 of the definition.
+
+ a) AND Formula
+ For AND formula the check for accurate selectivity estimates depends
+ whether or not the AND formula is at the top level.
+
+ i) Top level
+ For an AND formula at the top level, we need to check if
+ accurate estimates are available for all the predicates
+ inside an AND formula.
+ If this is true then accurate selectivity estimates are available
+ for the AND formula.
+
+ Eg: t1.a > 10 and t2.a < 5
+
+ if we have accurate selectivity estimates
+ for t1.a > 10 and t2.a < 5 via indexes or statistical tables,
+ then selectivity estimates for this AND formula are accurate
+
+ ii) Non-top level
+ For all the predicates inside an AND formula
+ accurate selectivity estimates are needed
+ and each predicate need to be resolved by one
+ column (table column). If this scenario is satisfied then
+ accurate selectivity estimates is available for the AND formula.
+ Eg: t1.a = t2.a AND ( (t1.a > 5 AND t2.a < 10) OR t1.a <= 0)
+
+ b) OR Formula
+
+ For an OR predicate, we need to make sure that the
+ whole OR predicate can be resolved by one column
+ directly or indirectly (that is via multiple equalities).
+ If this is possible then for the resolved column we need to have
+ statistics either from the first component of an index or
+ via statistical tables.
+
+ Eg: t1.a=t2.b and (t2.b > 5 or t1.a < 0);
+
+ In the end for all fields we may have selectivity from an index or
+ statistical tables.
+
+ @notes
+ The implementation for this function use the 'walk' method to traverse
+ the tree of this item with predicate_selectivity_checker() as the
+ call-back parameter of the method.
+
+
+ @retval
+ TRUE selectivity estimates are accurate
+ FALSE OTHERWISE
+*/
+
+bool Item::with_accurate_selectivity_estimation()
+{
+ if (type() == Item::COND_ITEM &&
+ ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC)
+ {
+ List_iterator<Item> li(*((Item_cond*) this)->argument_list());
+ Item *item;
+ while ((item= li++))
+ {
+ SAME_FIELD arg= {NULL, false};
+ if (item->walk(&Item::predicate_selectivity_checker, 0, &arg))
+ return false;
+ }
+ return true;
+ }
+ SAME_FIELD arg= {NULL, false};
+ return !walk(&Item::predicate_selectivity_checker, 0, &arg);
+}
+
+
static
Item *get_field_item_for_having(THD *thd, Item *item, st_select_lex *sel)
{
@@ -9217,6 +9333,103 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel)
}
+/*
+ @brief
+ Checks if a formula of a condition contains the same column
+
+ @details
+ In the function we try to check if a formula of a condition depends
+ (directly or indirectly through equalities inferred from the
+ conjuncted multiple equalities) only on one column.
+
+ Eg:
+ WHERE clause is:
+ t1.a=t2.b and (t1.a > 5 or t2.b < 1);
+
+ the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of
+ equalities to conclude that it depends on one column.
+
+ This is used mostly for OR conjuncts where we need to make sure
+ that the entire OR conjunct contains only one column, so that we may
+ get accurate estimates.
+
+ @retval
+ TRUE : the formula does not depend on one column
+ FALSE : OTHERWISE
+*/
+
+bool Item_field::dep_on_one_column(void *arg)
+{
+ SAME_FIELD *same_field_arg= (SAME_FIELD*)arg;
+
+ /*
+ The same_field_arg is passed as a parameter because when we start walking
+ over the condition tree we don't know which column the predicate will be
+ dependent on. So as soon as we encounter a leaf of the condition tree
+ which is a field item, we set the SAME_FIELD::item to the found
+ field item and then compare the rest of the fields in the predicate with
+ the field item.
+ */
+
+ if (same_field_arg->item == NULL)
+ {
+ same_field_arg->item= this;
+ same_field_arg->is_stats_available=
+ field->is_statistics_available_for_range_predicates() ||
+ (item_equal &&
+ item_equal->is_statistics_available_for_range_predicates());
+ return !same_field_arg->is_stats_available;
+ }
+
+ /* Found the same field while traversing the condition tree */
+ DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM);
+ if (((Item_field*)same_field_arg->item->real_item())->field == field)
+ return false;
+
+ if (!same_field_arg->item->get_item_equal())
+ return true;
+
+ return !(same_field_arg->item->get_item_equal() == item_equal);
+}
+
+
+bool Item_direct_view_ref::dep_on_one_column(void *arg)
+{
+ SAME_FIELD *same_field_arg= (SAME_FIELD*)arg;
+ DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM);
+ Item_field *field_item= (Item_field*)real_item();
+
+ /*
+ The same_field_arg is passed as a parameter because when we start walking
+ over the condition tree we don't know which column the predicate will be
+ dependent on. So as soon as we encounter a leaf of the condition tree
+ which is a field item, we set the SAME_FIELD::item to the found
+ field item and then compare the rest of the fields in the predicate with
+ the field item.
+ */
+
+ if (same_field_arg->item == NULL)
+ {
+ same_field_arg->item= this;
+ same_field_arg->is_stats_available=
+ field_item->field->is_statistics_available_for_range_predicates() ||
+ (item_equal &&
+ item_equal->is_statistics_available_for_range_predicates());
+ return !same_field_arg->is_stats_available;
+ }
+
+ /* Found the same field while traversing the condition tree */
+ DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM);
+ if (((Item_field*)same_field_arg->item->real_item())->field == field_item->field)
+ return false;
+
+ if (!same_field_arg->item->get_item_equal())
+ return true;
+
+ return !(same_field_arg->item->get_item_equal() == item_equal);
+}
+
+
bool Item_direct_view_ref::excl_dep_on_table(table_map tab_map)
{
table_map used= used_tables();