diff options
Diffstat (limited to 'sql/item.cc')
-rw-r--r-- | sql/item.cc | 213 |
1 files changed, 213 insertions, 0 deletions
diff --git a/sql/item.cc b/sql/item.cc index 52274380cd1..f80db684bb3 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -7491,6 +7491,122 @@ Item *Item::build_pushable_cond(THD *thd, } +/* + @brief + Check if selectivity estimates are accurate for a conditional formula + + @details + This function checks whether this item belongs to a certain class of + condition for which we can calculate an accurate selectivity estimate. + + The definition of the class of condition is recursive. + 1. Simple formula + a. Formula in the form of range predicates: + + The predicate would be of type: + col op const + where op can be + + op: + | > + | >= + | < + | <= + | = + | <> + Also the other cases are with + [NOT] IN predicate, + [NOT] NULL predicate and + LIKE predicate. + The predicate should have only one non-constant argument and + this argument will be a reference to a column that is used either + as the first component of an index or statistics are available via + statistical tables. + + b. Equalities: + For an equality to have accurate selectivity estimates, + the number of distinct values for each column in the equality + needs to be known. + Eg: t1.a= t2.a is transformed to MULTIPLE_EQUAL(t1.a, t2.a) + For this case we need to make sure we know number of distinct + values for t1.a and t2.a + + The number of distinct values for a column can be known by + 1) from indexes via rec_per_key + 2) from statistical tables via avg_frequency. + + 2. AND / OR formula over formulas defined in section 1 of the definition. + + a) AND Formula + For AND formula the check for accurate selectivity estimates depends + whether or not the AND formula is at the top level. + + i) Top level + For an AND formula at the top level, we need to check if + accurate estimates are available for all the predicates + inside an AND formula. + If this is true then accurate selectivity estimates are available + for the AND formula. + + Eg: t1.a > 10 and t2.a < 5 + + if we have accurate selectivity estimates + for t1.a > 10 and t2.a < 5 via indexes or statistical tables, + then selectivity estimates for this AND formula are accurate + + ii) Non-top level + For all the predicates inside an AND formula + accurate selectivity estimates are needed + and each predicate need to be resolved by one + column (table column). If this scenario is satisfied then + accurate selectivity estimates is available for the AND formula. + Eg: t1.a = t2.a AND ( (t1.a > 5 AND t2.a < 10) OR t1.a <= 0) + + b) OR Formula + + For an OR predicate, we need to make sure that the + whole OR predicate can be resolved by one column + directly or indirectly (that is via multiple equalities). + If this is possible then for the resolved column we need to have + statistics either from the first component of an index or + via statistical tables. + + Eg: t1.a=t2.b and (t2.b > 5 or t1.a < 0); + + In the end for all fields we may have selectivity from an index or + statistical tables. + + @notes + The implementation for this function use the 'walk' method to traverse + the tree of this item with predicate_selectivity_checker() as the + call-back parameter of the method. + + + @retval + TRUE selectivity estimates are accurate + FALSE OTHERWISE +*/ + +bool Item::with_accurate_selectivity_estimation() +{ + if (type() == Item::COND_ITEM && + ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator<Item> li(*((Item_cond*) this)->argument_list()); + Item *item; + while ((item= li++)) + { + SAME_FIELD arg= {NULL, false}; + if (item->walk(&Item::predicate_selectivity_checker, 0, &arg)) + return false; + } + return true; + } + SAME_FIELD arg= {NULL, false}; + return !walk(&Item::predicate_selectivity_checker, 0, &arg); +} + + static Item *get_field_item_for_having(THD *thd, Item *item, st_select_lex *sel) { @@ -9217,6 +9333,103 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel) } +/* + @brief + Checks if a formula of a condition contains the same column + + @details + In the function we try to check if a formula of a condition depends + (directly or indirectly through equalities inferred from the + conjuncted multiple equalities) only on one column. + + Eg: + WHERE clause is: + t1.a=t2.b and (t1.a > 5 or t2.b < 1); + + the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of + equalities to conclude that it depends on one column. + + This is used mostly for OR conjuncts where we need to make sure + that the entire OR conjunct contains only one column, so that we may + get accurate estimates. + + @retval + TRUE : the formula does not depend on one column + FALSE : OTHERWISE +*/ + +bool Item_field::dep_on_one_column(void *arg) +{ + SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; + + /* + The same_field_arg is passed as a parameter because when we start walking + over the condition tree we don't know which column the predicate will be + dependent on. So as soon as we encounter a leaf of the condition tree + which is a field item, we set the SAME_FIELD::item to the found + field item and then compare the rest of the fields in the predicate with + the field item. + */ + + if (same_field_arg->item == NULL) + { + same_field_arg->item= this; + same_field_arg->is_stats_available= + field->is_statistics_available_for_range_predicates() || + (item_equal && + item_equal->is_statistics_available_for_range_predicates()); + return !same_field_arg->is_stats_available; + } + + /* Found the same field while traversing the condition tree */ + DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM); + if (((Item_field*)same_field_arg->item->real_item())->field == field) + return false; + + if (!same_field_arg->item->get_item_equal()) + return true; + + return !(same_field_arg->item->get_item_equal() == item_equal); +} + + +bool Item_direct_view_ref::dep_on_one_column(void *arg) +{ + SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; + DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM); + Item_field *field_item= (Item_field*)real_item(); + + /* + The same_field_arg is passed as a parameter because when we start walking + over the condition tree we don't know which column the predicate will be + dependent on. So as soon as we encounter a leaf of the condition tree + which is a field item, we set the SAME_FIELD::item to the found + field item and then compare the rest of the fields in the predicate with + the field item. + */ + + if (same_field_arg->item == NULL) + { + same_field_arg->item= this; + same_field_arg->is_stats_available= + field_item->field->is_statistics_available_for_range_predicates() || + (item_equal && + item_equal->is_statistics_available_for_range_predicates()); + return !same_field_arg->is_stats_available; + } + + /* Found the same field while traversing the condition tree */ + DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM); + if (((Item_field*)same_field_arg->item->real_item())->field == field_item->field) + return false; + + if (!same_field_arg->item->get_item_equal()) + return true; + + return !(same_field_arg->item->get_item_equal() == item_equal); +} + + bool Item_direct_view_ref::excl_dep_on_table(table_map tab_map) { table_map used= used_tables(); |