diff options
author | Varun Gupta <varun.gupta@mariadb.com> | 2021-02-05 23:54:28 +0530 |
---|---|---|
committer | Varun Gupta <varun.gupta@mariadb.com> | 2021-02-06 12:09:36 +0530 |
commit | 9b8c3ee965e5a736bc972a749bdd2e38ecacfad5 (patch) | |
tree | 742621383c7b7841d63927cfd8c4fbe2217592c3 | |
parent | f91641b4fa56ddddfb250b2cffbc464cd544e95f (diff) | |
download | mariadb-git-10.5-mdev8306.tar.gz |
Using recurisive descent instead of walk to check if the join cardinality estimates are precise10.5-mdev8306
-rw-r--r-- | mysql-test/main/join_cardinality.result | 2 | ||||
-rw-r--r-- | sql/item.cc | 198 | ||||
-rw-r--r-- | sql/item.h | 9 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 103 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 16 | ||||
-rw-r--r-- | sql/sql_select.cc | 7 |
6 files changed, 184 insertions, 151 deletions
diff --git a/mysql-test/main/join_cardinality.result b/mysql-test/main/join_cardinality.result index e41438e1a3e..e98de9c087b 100644 --- a/mysql-test/main/join_cardinality.result +++ b/mysql-test/main/join_cardinality.result @@ -733,7 +733,7 @@ SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE; JSON_DETAILED(JSON_EXTRACT(trace, '$**.cardinality_accurate')) [ - true + false ] EXPLAIN SELECT * from t1 WHERE t1.a=t1.b AND (abs(t1.a),t1.b) IN ((1,1), (2,2), (3,3)); diff --git a/sql/item.cc b/sql/item.cc index fe6c81b7b95..ae144402b64 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -7729,7 +7729,6 @@ Item *Item::build_pushable_condition(THD *thd, bool no_top_clones) this argument will be a reference to a column that is used either as the first component of an index or statistics are available via statistical tables. - b. Equalities: For an equality to have accurate selectivity estimates, the number of distinct values for each column in the equality @@ -7741,7 +7740,6 @@ Item *Item::build_pushable_condition(THD *thd, bool no_top_clones) The number of distinct values for a column can be known by 1) from indexes via rec_per_key 2) from statistical tables via avg_frequency. - 2. (recursive step) AND / OR formula over formulas defined in section 1 of the definition. @@ -7784,11 +7782,14 @@ Item *Item::build_pushable_condition(THD *thd, bool no_top_clones) In the end for all fields we may have selectivity from an index or statistical tables. + 3. (optional additional non-recursive step) + Conjunction of formulas described in sections 1 and 2. @notes - The implementation for this function use the 'walk' method to traverse - the tree of this item with predicate_selectivity_checker() as the - call-back parameter of the method. + The implementation for this function use the recursive descent to walk + over the condition tree. A call-back function + predicate_selectivity_checker() is passed as a parameter to this function. + propagate_equal_fields() is called before this function is called so Item_field::item_equal and Item_direct_view_ref::item_equal is set. @@ -7797,27 +7798,55 @@ Item *Item::build_pushable_condition(THD *thd, bool no_top_clones) FALSE OTHERWISE */ -bool Item::with_accurate_selectivity_estimation() +bool Item::with_accurate_selectivity_estimation(Predicate_checker checker, + void *arg, bool top_level) { - /* - For the below test one could use a virtual function but that would - take a lot of space for other item as there will be entires in the vtable - */ - if (type() == Item::COND_ITEM && - ((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) + if (type() == Item::COND_ITEM) { - List_iterator<Item> li(*((Item_cond*) this)->argument_list()); + Item_cond_and *and_cond= + (((Item_cond*) this)->functype() == Item_func::COND_AND_FUNC) ? + ((Item_cond_and*) this) : 0; + + List<Item> *arg_list= ((Item_cond*) this)->argument_list(); + List_iterator<Item> li(*arg_list); Item *item; - while ((item= li++)) + + if (and_cond) + { + uint counter=0; + while ((item=li++)) + { + SAME_FIELD arg_same_field= {NULL, FALSE}; + if (item->with_accurate_selectivity_estimation(checker, + (top_level ? + (void *)&arg_same_field : + arg), + top_level)) + { + counter++; + } + } + + if (counter == arg_list->elements) + return true; + } + else { - SAME_FIELD arg= {NULL, false}; - if (item->walk(&Item::predicate_selectivity_checker, 0, &arg)) - return false; + uint counter=0; + while ((item=li++)) + { + if (item->with_accurate_selectivity_estimation(checker, arg, FALSE)) + { + counter++; + } + } + if (counter == arg_list->elements) + return true; } - return true; + return false; } - SAME_FIELD arg= {NULL, false}; - return !walk(&Item::predicate_selectivity_checker, 0, &arg); + + return !((this->*checker) (arg)); } @@ -9551,81 +9580,6 @@ Item_field::excl_dep_on_grouping_fields(st_select_lex *sel) } -/* - @brief - Checks if a formula of a condition contains the same column - - @details - In the function we try to check if a formula of a condition depends - (directly or indirectly through equalities inferred from the - conjuncted multiple equalities) only on one column. - - Eg: - WHERE clause is: - t1.a=t2.b and (t1.a > 5 or t2.b < 1); - - the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of - equalities to conclude that it depends on one column. - - This is used mostly for OR conjuncts where we need to make sure - that the entire OR conjunct contains only one column, so that we may - get accurate estimates. - An example with top level OR conjunct would be: - WHERE A=1 or A between 100 and 200 or A > 1000 - - @retval - TRUE : the formula does not depend on one column - FALSE : OTHERWISE -*/ - -bool Item_field::predicate_selectivity_checker(void *arg) -{ - SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; - - /* - The same_field_arg is passed as a parameter because when we start walking - over the condition tree we don't know which column the predicate will be - dependent on. So as soon as we encounter a leaf of the condition tree - which is a field item, we set the SAME_FIELD::item to the found - field item and then compare the rest of the fields in the predicate with - the field item. - */ - - if (same_field_arg->item == NULL) - { - same_field_arg->item= this; - same_field_arg->is_stats_available= - field->is_range_statistics_available() || - (item_equal && item_equal->is_statistics_available()); - return false; - } - - DBUG_ASSERT(same_field_arg->item->real_item()->type() == Item::FIELD_ITEM); - /* Found the same field while traversing the condition tree */ - if (((Item_field*)same_field_arg->item->real_item())->field == field) - return false; - - if (!same_field_arg->item->get_item_equal()) - return true; - - return !(same_field_arg->item->get_item_equal() == item_equal); -} - - -bool Item_direct_view_ref::predicate_selectivity_checker(void *arg) -{ - SAME_FIELD *same_field_arg= (SAME_FIELD*)arg; - if (same_field_arg->item == real_item()) - { - DBUG_ASSERT(real_item()->type() == Item::FIELD_ITEM); - same_field_arg->item= this; - same_field_arg->is_stats_available|= - (item_equal && item_equal->is_statistics_available()); - } - return false; -} - - bool Item_direct_view_ref::excl_dep_on_tables(table_map tab_map, bool multi_eq_checked) { @@ -11043,3 +10997,57 @@ bool Item::is_non_const_field_item() return true; return false; } + + +/* + @brief + Checks if a formula of a condition contains the same column + + @details + In the function we try to check if a formula of a condition depends + (directly or indirectly through equalities inferred from the + conjuncted multiple equalities) only on one column. + + Eg: + WHERE clause is: + t1.a=t2.b and (t1.a > 5 or t2.b < 1); + + the predicate (t1.a > 5 or t2.b < 1) can be resolved with the help of + equalities to conclude that it depends on one column. + + This is used mostly for OR conjuncts where we need to make sure + that the entire OR conjunct contains only one column, so that we may + get accurate estimates. + An example with top level OR conjunct would be: + WHERE A=1 or A between 100 and 200 or A > 1000 + + @retval + TRUE : the formula depends on one column + FALSE : OTHERWISE +*/ + +bool Item::is_resolved_by_same_column(void *arg) +{ + SAME_FIELD *same_field_arg= (SAME_FIELD*) arg; + if (same_field_arg->item == NULL) + { + Field *field= ((Item_field *)real_item())->field; + same_field_arg->item= this; + same_field_arg->is_stats_available= + (field->is_range_statistics_available() || + (get_item_equal() && + get_item_equal()->is_statistics_available())); + return true; + } + + Item *item= same_field_arg->item; + + if (((Item_field*)item->real_item())->field == + ((Item_field*)real_item())->field) + return true; + + if (!item->get_item_equal()) + return false; + + return (item->get_item_equal() == get_item_equal()); +} diff --git a/sql/item.h b/sql/item.h index 61ce5d230e0..2e2993ccd2f 100644 --- a/sql/item.h +++ b/sql/item.h @@ -624,6 +624,9 @@ typedef bool (Item::*Item_analyzer) (uchar **argp); typedef Item* (Item::*Item_transformer) (THD *thd, uchar *arg); typedef void (*Cond_traverser) (const Item *item, void *arg); typedef bool (Item::*Pushdown_checker) (uchar *arg); +typedef bool (Item::*Predicate_checker) (void *arg); + + struct st_cond_statistic; @@ -1999,7 +2002,8 @@ public: virtual bool find_selective_predicates_list_processor(void *arg) { return 0; } - bool with_accurate_selectivity_estimation(); + bool with_accurate_selectivity_estimation(Predicate_checker checker, + void *arg, bool top_level); /* @brief @@ -2546,6 +2550,7 @@ public: */ bool pushable_equality_checker_for_subquery(uchar *arg); bool is_non_const_field_item(); + bool is_resolved_by_same_column(void *arg); }; MEM_ROOT *get_thd_memroot(THD *thd); @@ -3654,7 +3659,6 @@ public: return field->table->pos_in_table_list->outer_join; } bool check_index_dependence(void *arg); - bool predicate_selectivity_checker(void *arg); Item *replace_with_nest_items(THD *thd, uchar *arg); friend class Item_default_value; friend class Item_insert_value; @@ -6029,7 +6033,6 @@ public: Item *field_transformer_for_having_pushdown(THD *thd, uchar *arg) { return this; } Item *remove_item_direct_ref() { return this; } - bool predicate_selectivity_checker(void *arg); }; diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 317ac38a10f..ad326cbdd78 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -2112,18 +2112,16 @@ bool Item_func_between::count_sargable_conds(void *arg) return 0; } -bool Item_func_between::predicate_selectivity_checker(void *arg) -{ - SAME_FIELD *field_arg= (SAME_FIELD*)arg; - if (!field_arg->is_stats_available) - return true; +bool Item_func_between::predicate_selectivity_checker2(void *arg) +{ if (arguments()[0]->real_item()->type() == Item::FIELD_ITEM) { - if (is_range_predicate(args[0], args[1]) && - is_range_predicate(args[0], args[2])) - return false; - return true; + if (!(is_range_predicate(args[0], args[1]) && + is_range_predicate(args[0], args[2]))) + return true; + return !(args[0]->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available); } for (uint i= 1 ; i < arg_count ; i++) @@ -2132,6 +2130,9 @@ bool Item_func_between::predicate_selectivity_checker(void *arg) { if (!is_range_predicate(args[i], args[0])) return true; + if (!((args[i]->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available))) + return true; } } return false; @@ -4321,13 +4322,25 @@ bool Item_func_in::count_sargable_conds(void *arg) } -bool Item_func_in::predicate_selectivity_checker(void *arg) +bool Item_func_in::predicate_selectivity_checker2(void *arg) { - SAME_FIELD *field_arg= (SAME_FIELD*)arg; - if (!field_arg->is_stats_available) + if (!all_items_are_consts(args + 1, arg_count - 1)) return true; - if (all_items_are_consts(args + 1, arg_count - 1)) - return false; + + if (args[0]->type() == Item::ROW_ITEM) + { + /* + Currently not walking inside the row item to check if all the fields are + same or not, that is not a case that users would be using, so just + disallowing such predicates and returning that selectivity for the + predicate is not available + */ + return true; + } + + if (args[0]->is_non_const_field_item()) + return !(args[0]->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available); return true; } @@ -5560,15 +5573,12 @@ bool Item_func_null_predicate::count_sargable_conds(void *arg) } -bool Item_func_null_predicate::predicate_selectivity_checker(void *arg) +bool Item_func_null_predicate::predicate_selectivity_checker2(void *arg) { - SAME_FIELD *field_arg= (SAME_FIELD*)arg; - if (!field_arg->is_stats_available) + if (!args[0]->is_non_const_field_item()) return true; - - if (args[0]->is_non_const_field_item()) - return false; - return true; + return !(args[0]->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available); } @@ -5654,16 +5664,16 @@ bool Item_bool_func2::count_sargable_conds(void *arg) } -bool Item_bool_func2::predicate_selectivity_checker(void *arg) +bool Item_bool_func2::predicate_selectivity_checker2(void *arg) { - SAME_FIELD *field_arg= (SAME_FIELD*)arg; - if (!field_arg->is_stats_available) + if (!(is_range_predicate(args[0], args[1]) || + is_range_predicate(args[1], args[0]))) return true; - if (is_range_predicate(args[0], args[1]) || - is_range_predicate(args[1], args[0])) - return false; - return true; + Item *item= args[0]->is_non_const_field_item() ? args[0] : args[1]; + + return !(item->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available); } @@ -5772,15 +5782,12 @@ SEL_TREE *Item_func_like::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) } -bool Item_func_like::predicate_selectivity_checker(void *arg) +bool Item_func_like::predicate_selectivity_checker2(void *arg) { - SAME_FIELD *field_arg= (SAME_FIELD*)arg; - if (!field_arg->is_stats_available) + if (!with_sargable_pattern()) return true; - - if (with_sargable_pattern()) - return false; - return true; + return !(args[0]->is_resolved_by_same_column(arg) && + ((SAME_FIELD*)arg)->is_stats_available); } @@ -7236,7 +7243,7 @@ bool Item_equal::count_sargable_conds(void *arg) } -bool Item_equal::predicate_selectivity_checker(void *arg) +bool Item_equal::predicate_selectivity_checker2(void *arg) { /* For equality conditions like tbl1.col = tbl2.col @@ -7254,17 +7261,29 @@ bool Item_equal::predicate_selectivity_checker(void *arg) field->is_ndv_available())) return true; } - return false; + } + else + { + while (it++) + { + Field *field= it.get_curr_field(); + field->is_range_statistics_available(); + if (!field->is_ndv_available()) + return true; + } } + it.rewind(); + Item *item= (it++); + SAME_FIELD *same_field_arg= (SAME_FIELD *) arg; - while (it++) + if (same_field_arg->item == NULL) { - Field *field= it.get_curr_field(); - if (!(field->is_ndv_available())) - return true; + item->is_resolved_by_same_column(arg); + return false; } - return false; + + return !item->is_resolved_by_same_column(arg); } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 12cecac8ec2..cbb9175eb48 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -227,7 +227,6 @@ public: bool fix_length_and_dec() { decimals=0; max_length=1; return FALSE; } uint decimal_precision() const { return 1; } bool need_parentheses_in_default() { return true; } - bool predicate_selectivity_checker(void *arg) { return TRUE; } }; @@ -420,7 +419,8 @@ public: COND *remove_eq_conds(THD *thd, Item::cond_result *cond_value, bool top_level); bool count_sargable_conds(void *arg); - bool predicate_selectivity_checker(void *arg); + bool predicate_selectivity_checker2(void *arg); + /* Specifies which result type the function uses to compare its arguments. This method is used in equal field propagation. @@ -939,7 +939,7 @@ public: bool find_not_null_fields(table_map allowed); void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge); bool count_sargable_conds(void *arg); - bool predicate_selectivity_checker(void *arg); + bool predicate_selectivity_checker2(void *arg); void add_key_fields(JOIN *join, KEY_FIELD **key_fields, uint *and_level, table_map usable_tables, SARGABLE_PARAM **sargables); @@ -2477,7 +2477,7 @@ public: bool find_not_null_fields(table_map allowed); void fix_after_pullout(st_select_lex *new_parent, Item **ref, bool merge); bool count_sargable_conds(void *arg); - bool predicate_selectivity_checker(void *arg); + bool predicate_selectivity_checker2(void *arg); Item *get_copy(THD *thd) { return get_item_copy<Item_func_in>(thd, this); } Item *build_clone(THD *thd) @@ -2575,7 +2575,7 @@ public: return FALSE; } bool count_sargable_conds(void *arg); - bool predicate_selectivity_checker(void *arg); + bool predicate_selectivity_checker2(void *arg); }; @@ -2826,7 +2826,7 @@ public: Item *get_copy(THD *thd) { return get_item_copy<Item_func_like>(thd, this); } - bool predicate_selectivity_checker(void *arg); + bool predicate_selectivity_checker2(void *arg); }; @@ -3236,7 +3236,7 @@ public: uint elements_count() { return equal_items.elements; } friend class Item_equal_fields_iterator; bool count_sargable_conds(void *arg); - bool predicate_selectivity_checker(void *arg); + bool predicate_selectivity_checker2(void *arg); bool is_statistics_available(); Item *multiple_equality_transformer(THD *thd, uchar *arg); friend class Item_equal_iterator<List_iterator_fast,Item>; @@ -3389,7 +3389,6 @@ public: SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr); Item *get_copy(THD *thd) { return get_item_copy<Item_cond_and>(thd, this); } - bool predicate_selectivity_checker(void *arg) { return FALSE; } }; inline bool is_cond_and(Item *item) @@ -3414,7 +3413,6 @@ public: Item *neg_transformer(THD *thd); Item *get_copy(THD *thd) { return get_item_copy<Item_cond_or>(thd, this); } - bool predicate_selectivity_checker(void *arg) { return FALSE; } }; class Item_func_dyncol_check :public Item_bool_func diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 896277da6cc..a4464299659 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8486,7 +8486,12 @@ choose_plan(JOIN *join, table_map join_tables) if (join->conds && (join->sort_nest_possible || thd->trace_started())) { bool cardinality_accurate; - cardinality_accurate= join->conds->with_accurate_selectivity_estimation(); + SAME_FIELD arg= {NULL, FALSE}; + cardinality_accurate= + join->conds-> + with_accurate_selectivity_estimation(&Item::predicate_selectivity_checker, + &arg, TRUE); + wrapper.add("cardinality_accurate", cardinality_accurate); if (!cardinality_accurate) |