diff options
author | Sergey Petrunya <psergey@askmonty.org> | 2009-08-13 02:34:21 +0400 |
---|---|---|
committer | Sergey Petrunya <psergey@askmonty.org> | 2009-08-13 02:34:21 +0400 |
commit | 854bb82bd81afa6decc436b9ecf6af4954efebe5 (patch) | |
tree | fce26373e6bb62ae4ab4f3e1fc922d53145deb8b /sql | |
parent | 5ecad03df9eef6e4d68cc7767b684944384e084e (diff) | |
download | mariadb-git-854bb82bd81afa6decc436b9ecf6af4954efebe5.tar.gz |
MWL#17: Table elimination
Address review feedback:
- Change from Wave-based approach (a-la const table detection) to
building and walking functional dependency graph.
- Change from piggy-backing on ref-access code and KEYUSE structures
to using our own expression analyzer.
sql/item.cc:
MWL#17: Table elimination
- Move from C-ish Field_processor_info to C++ ish and generic Field_enumerator
sql/item.h:
MWL#17: Table elimination
- Move from C-ish Field_processor_info to C++ ish and generic Field_enumerator
sql/sql_bitmap.h:
MWL#17: Table elimination
- Backport of Table_map_iterator from 6.0
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item.cc | 26 | ||||
-rw-r--r-- | sql/item.h | 10 | ||||
-rw-r--r-- | sql/opt_table_elimination.cc | 1358 | ||||
-rw-r--r-- | sql/sql_bitmap.h | 33 |
4 files changed, 1113 insertions, 314 deletions
diff --git a/sql/item.cc b/sql/item.cc index a010b98cd40..9cf369a2670 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1918,30 +1918,8 @@ void Item_field::reset_field(Field *f) bool Item_field::check_column_usage_processor(uchar *arg) { - Field_processor_info* info=(Field_processor_info*)arg; - - if (field->table == info->table) - { - /* It is not ok to use columns that are not part of the key of interest: */ - if (!(field->part_of_key.is_set(info->keyno))) - return TRUE; - - /* Find which key part we're using and mark it in needed_key_parts */ - KEY *key= &field->table->key_info[info->keyno]; - for (uint part= 0; part < key->key_parts; part++) - { - if (field->field_index == key->key_part[part].field->field_index) - { - if (part == info->forbidden_part) - return TRUE; - info->needed_key_parts |= key_part_map(1) << part; - break; - } - } - return FALSE; - } - else - info->used_tables |= this->used_tables(); + Field_enumerator *fe= (Field_enumerator*)arg; + fe->see_field(field); return FALSE; } diff --git a/sql/item.h b/sql/item.h index 0e74c742d99..53ae8ab8826 100644 --- a/sql/item.h +++ b/sql/item.h @@ -1017,7 +1017,7 @@ public: bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs); }; -/* Data for Item::check_column_usage_processor */ +#if 0 typedef struct { TABLE *table; /* Table of interest */ @@ -1028,7 +1028,15 @@ typedef struct /* [Set by processor] Parts of index of interest that expression refers to */ uint needed_key_parts; } Field_processor_info; +#endif +/* Data for Item::check_column_usage_processor */ +class Field_enumerator +{ +public: + virtual void see_field(Field *field)= 0; + virtual ~Field_enumerator() {}; /* Shut up compiler warning */ +}; class sp_head; diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc index fc8bf1231ea..e5829b1eb96 100644 --- a/sql/opt_table_elimination.cc +++ b/sql/opt_table_elimination.cc @@ -13,6 +13,7 @@ #endif #include "mysql_priv.h" +#include "my_bit.h" #include "sql_select.h" /* @@ -35,137 +36,691 @@ * Items that became disused because they were in the ON expression of an eliminated outer join are notified by means of the Item tree walk which calls Item::mark_as_eliminated_processor for every item - - At the moment the only Item that cares is Item_subselect with its - Item_subselect::eliminated flag which is used by EXPLAIN code to - check if the subquery should be shown in EXPLAIN. + - At the moment the only Item that cares whether it was eliminated is + Item_subselect with its Item_subselect::eliminated flag which is used + by EXPLAIN code to check if the subquery should be shown in EXPLAIN. - Table elimination is redone on every PS re-execution. + Table elimination is redone on every PS re-execution. (TODO reasons?) */ +/* + A structure that represents a functional dependency of something over + something else. This can be one of: + + 1. A "tbl.field = expr" equality. The field depends on the expression. + + 2. An Item_equal(...) multi-equality. Each participating field depends on + every other participating field. (TODO???) + + 3. A UNIQUE_KEY(field1, field2, fieldN). The key depends on the fields that + it is composed of. + + 4. A table (which is within an outer join nest). Table depends on a unique + key (value of a unique key identifies a table record) + + 5. An outer join nest. It depends on all tables it contains. + +*/ + +class Func_dep : public Sql_alloc +{ +public: + enum { + FD_INVALID, + FD_EXPRESSION, + FD_FIELD, + FD_MULTI_EQUALITY, + FD_UNIQUE_KEY, + FD_TABLE, + FD_OUTER_JOIN + } type; + Func_dep *next; + bool bound; + Func_dep() : next(NULL), bound(FALSE) {} +}; + + +class Field_dep; +class Table_dep; +class Outer_join_dep; + +/* + An equality + - Depends on multiple fields (those in its expression), unknown_args is a + counter of unsatisfied dependencies. +*/ +class Equality_dep : public Func_dep +{ +public: + Field_dep *field; + Item *val; + + uint level; /* Used during condition analysis only */ + uint unknown_args; /* Number of yet unknown arguments */ +}; + + +/* + A field. + - Depends on table or equality + - Has expressions it participates as dependencies + + There is no counter, bound fields are in $list, not bound are not. +*/ +class Field_dep : public Func_dep +{ +public: + Field_dep(Table_dep *table_arg, Field *field_arg) : + table(table_arg), field(field_arg) + { + type= Func_dep::FD_FIELD; + } + /* Table we're from. It also has pointers to keys that we're part of */ + Table_dep *table; + Field *field; + + Field_dep *next_table_field; + uint bitmap_offset; /* Offset of our part of the bitmap */ +}; + + +/* + A unique key. + - Depends on all its components + - Has its table as dependency +*/ +class Key_dep: public Func_dep +{ +public: + Key_dep(Table_dep *table_arg, uint keyno_arg, uint n_parts_arg) : + table(table_arg), keyno(keyno_arg), n_missing_keyparts(n_parts_arg), + next_table_key(NULL) + { + type= Func_dep::FD_UNIQUE_KEY; + } + Table_dep *table; /* Table this key is from */ + uint keyno; // TODO do we care about this + uint n_missing_keyparts; + Key_dep *next_table_key; +}; + + +/* + A table. + - Depends on any of its unique keys + - Has its fields and embedding outer join as dependency. +*/ +class Table_dep : public Func_dep +{ +public: + Table_dep(TABLE *table_arg) : + table(table_arg), fields(NULL), keys(NULL), outer_join_dep(NULL) + { + type= Func_dep::FD_TABLE; + } + TABLE *table; + Field_dep *fields; /* Fields that belong to this table */ + Key_dep *keys; /* Unique keys */ + Outer_join_dep *outer_join_dep; +}; + + +/* + An outer join nest. + - Depends on all tables inside it. + - (And that's it). +*/ +class Outer_join_dep: public Func_dep +{ +public: + Outer_join_dep(TABLE_LIST *table_list_arg, table_map missing_tables_arg) : + table_list(table_list_arg), missing_tables(missing_tables_arg), + all_tables(missing_tables_arg), parent(NULL) + { + type= Func_dep::FD_OUTER_JOIN; + } + TABLE_LIST *table_list; + table_map missing_tables; + table_map all_tables; + Outer_join_dep *parent; +}; + + +/* TODO need this? */ +class Table_elimination +{ +public: + Table_elimination(JOIN *join_arg) : join(join_arg) + { + bzero(table_deps, sizeof(table_deps)); + } + + JOIN *join; + /* Array of equality dependencies */ + Equality_dep *equality_deps; + uint n_equality_deps; /* Number of elements in the array */ + + /* tablenr -> Table_dep* mapping. */ + Table_dep *table_deps[MAX_KEY]; + + /* Outer joins that are candidates for elimination */ + List<Outer_join_dep> oj_deps; + + /* Bitmap of how expressions depend on bits */ + MY_BITMAP expr_deps; +}; + + +static +void build_funcdeps_for_cond(Table_elimination *te, Equality_dep **fdeps, + uint *and_level, Item *cond, + table_map usable_tables); +static +void add_funcdep(Table_elimination *te, + Equality_dep **eq_dep, uint and_level, + Item_func *cond, Field *field, + bool eq_func, Item **value, + uint num_values, table_map usable_tables); +static +Equality_dep *merge_func_deps(Equality_dep *start, Equality_dep *new_fields, + Equality_dep *end, uint and_level); + +Field_dep *get_field_dep(Table_elimination *te, Field *field); +void eliminate_tables(JOIN *join); static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl); -static bool table_has_one_match(TABLE *table, table_map bound_tables, - bool *multiple_matches); -static uint -eliminate_tables_for_list(JOIN *join, TABLE **leaves_arr, - List<TABLE_LIST> *join_list, - bool its_outer_join, - table_map tables_in_list, - table_map tables_used_elsewhere, - bool *multiple_matches); -static bool -extra_keyuses_bind_all_keyparts(table_map bound_tables, TABLE *table, - KEYUSE *key_start, KEYUSE *key_end, - uint n_keyuses, table_map bound_parts); + +#ifndef DBUG_OFF +static void dbug_print_deps(Table_elimination *te); +#endif + +/*******************************************************************************************/ /* - Perform table elimination + Produce FUNC_DEP elements for the given item (i.e. condition) and add them + to fdeps array. SYNOPSIS - eliminate_tables() - join Join to work on - const_tbl_count INOUT Number of constant tables (this includes - eliminated tables) - const_tables INOUT Bitmap of constant tables + build_funcdeps_for_cond() + fdeps INOUT Put created FUNC_DEP structures here DESCRIPTION - This function is the entry point for table elimination. - The idea behind table elimination is that if we have an outer join: - - SELECT * FROM t1 LEFT JOIN - (t2 JOIN t3) ON t3.primary_key=t1.col AND - t4.primary_key=t2.col - such that + a - 1. columns of the inner tables are not used anywhere ouside the outer - join (not in WHERE, not in GROUP/ORDER BY clause, not in select list - etc etc), and - 2. inner side of the outer join is guaranteed to produce at most one - record combination for each record combination of outer tables. + SEE ALSO + add_key_fields() + +*/ +static +void build_funcdeps_for_cond(Table_elimination *te, + Equality_dep **fdeps, uint *and_level, Item *cond, + table_map usable_tables) +{ + if (cond->type() == Item_func::COND_ITEM) + { + List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list()); + Equality_dep *org_key_fields= *fdeps; - then the inner side of the outer join can be removed from the query. - This is because it will always produce one matching record (either a - real match or a NULL-complemented record combination), and since there - are no references to columns of the inner tables anywhere, it doesn't - matter which record combination it was. + /* AND/OR */ + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + Item *item; + while ((item=li++)) + { + build_funcdeps_for_cond(te, fdeps, and_level, item, usable_tables); + } + /* + TODO: inject here a "if we have {t.col=const AND t.col=smth_else}, then + remove the second part" logic. + */ + for (; org_key_fields != *fdeps ; org_key_fields++) + org_key_fields->level= *and_level; + } + else + { + (*and_level)++; + build_funcdeps_for_cond(te, fdeps, and_level, li++, usable_tables); + Item *item; + while ((item=li++)) + { + Equality_dep *start_key_fields= *fdeps; + (*and_level)++; + build_funcdeps_for_cond(te, fdeps, and_level, item, usable_tables); + *fdeps= merge_func_deps(org_key_fields, start_key_fields, *fdeps, + ++(*and_level)); + } + } + return; + } - This function primary handles checking #1. It collects a bitmap of - tables that are not used in select list/GROUP BY/ORDER BY/HAVING/etc and - thus can possibly be eliminated. - - SIDE EFFECTS - See the OVERVIEW section at the top of this file. + if (cond->type() != Item::FUNC_ITEM) + return; + Item_func *cond_func= (Item_func*) cond; + switch (cond_func->select_optimize()) { + case Item_func::OPTIMIZE_NONE: + break; + case Item_func::OPTIMIZE_KEY: + { + Item **values; + // BETWEEN, IN, NE + if (cond_func->key_item()->real_item()->type() == Item::FIELD_ITEM && + !(cond_func->used_tables() & OUTER_REF_TABLE_BIT)) + { + values= cond_func->arguments()+1; + if (cond_func->functype() == Item_func::NE_FUNC && + cond_func->arguments()[1]->real_item()->type() == Item::FIELD_ITEM && + !(cond_func->arguments()[0]->used_tables() & OUTER_REF_TABLE_BIT)) + values--; + DBUG_ASSERT(cond_func->functype() != Item_func::IN_FUNC || + cond_func->argument_count() != 2); + add_funcdep(te, fdeps, *and_level, cond_func, + ((Item_field*)(cond_func->key_item()->real_item()))->field, + 0, values, + cond_func->argument_count()-1, + usable_tables); + } + if (cond_func->functype() == Item_func::BETWEEN) + { + values= cond_func->arguments(); + for (uint i= 1 ; i < cond_func->argument_count() ; i++) + { + Item_field *field_item; + if (cond_func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM + && + !(cond_func->arguments()[i]->used_tables() & OUTER_REF_TABLE_BIT)) + { + field_item= (Item_field *) (cond_func->arguments()[i]->real_item()); + add_funcdep(te, fdeps, *and_level, cond_func, + field_item->field, 0, values, 1, usable_tables); + } + } + } + break; + } + case Item_func::OPTIMIZE_OP: + { + bool equal_func=(cond_func->functype() == Item_func::EQ_FUNC || + cond_func->functype() == Item_func::EQUAL_FUNC); -*/ + if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + !(cond_func->arguments()[0]->used_tables() & OUTER_REF_TABLE_BIT)) + { + add_funcdep(te, fdeps, *and_level, cond_func, + ((Item_field*)(cond_func->arguments()[0])->real_item())->field, + equal_func, + cond_func->arguments()+1, 1, usable_tables); + } + if (cond_func->arguments()[1]->real_item()->type() == Item::FIELD_ITEM && + cond_func->functype() != Item_func::LIKE_FUNC && + !(cond_func->arguments()[1]->used_tables() & OUTER_REF_TABLE_BIT)) + { + add_funcdep(te, fdeps, *and_level, cond_func, + ((Item_field*)(cond_func->arguments()[1])->real_item())->field, + equal_func, + cond_func->arguments(),1,usable_tables); + } + break; + } + case Item_func::OPTIMIZE_NULL: + /* column_name IS [NOT] NULL */ + if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM && + !(cond_func->used_tables() & OUTER_REF_TABLE_BIT)) + { + Item *tmp=new Item_null; + if (unlikely(!tmp)) // Should never be true + return; + add_funcdep(te, fdeps, *and_level, cond_func, + ((Item_field*)(cond_func->arguments()[0])->real_item())->field, + cond_func->functype() == Item_func::ISNULL_FUNC, + &tmp, 1, usable_tables); + } + break; + case Item_func::OPTIMIZE_EQUAL: + Item_equal *item_equal= (Item_equal *) cond; + Item *const_item= item_equal->get_const(); + Item_equal_iterator it(*item_equal); + Item_field *item; + if (const_item) + { + /* + For each field field1 from item_equal consider the equality + field1=const_item as a condition allowing an index access of the table + with field1 by the keys value of field1. + */ + while ((item= it++)) + { + add_funcdep(te, fdeps, *and_level, cond_func, item->field, + TRUE, &const_item, 1, usable_tables); + } + } + else + { + /* + Consider all pairs of different fields included into item_equal. + For each of them (field1, field1) consider the equality + field1=field2 as a condition allowing an index access of the table + with field1 by the keys value of field2. + */ + Item_equal_iterator fi(*item_equal); + while ((item= fi++)) + { + Field *field= item->field; + while ((item= it++)) + { + if (!field->eq(item->field)) + { + add_funcdep(te, fdeps, *and_level, cond_func, field/*item*/, + TRUE, (Item **) &item, 1, usable_tables); + } + } + it.rewind(); + } + } + break; + } +} -void eliminate_tables(JOIN *join) -{ - Item *item; - table_map used_tables; - DBUG_ENTER("eliminate_tables"); +/* + Perform an OR operation on two (adjacent) FUNC_DEP arrays. + + SYNOPSIS + merge_func_deps() + + DESCRIPTION + + This function is invoked for two adjacent arrays of FUNC_DEP elements: + + $LEFT_PART $RIGHT_PART + +-----------------------+-----------------------+ + start new_fields end + + The goal is to produce an array which would correspnd to the combined - DBUG_ASSERT(join->eliminated_tables == 0); + $LEFT_PART OR $RIGHT_PART - /* If there are no outer joins, we have nothing to eliminate: */ - if (!join->outer_join) - DBUG_VOID_RETURN; + condition. This is achieved as follows: First, we apply distrubutive law: + + (fdep_A_1 AND fdep_A_2 AND ...) OR (fdep_B_1 AND fdep_B_2 AND ...) = - /* Find the tables that are referred to from WHERE/HAVING */ - used_tables= (join->conds? join->conds->used_tables() : 0) | - (join->having? join->having->used_tables() : 0); + = AND_ij (fdep_A_[i] OR fdep_B_[j]) - /* Add tables referred to from the select list */ - List_iterator<Item> it(join->fields_list); - while ((item= it++)) - used_tables |= item->used_tables(); - - /* Add tables referred to from ORDER BY and GROUP BY lists */ - ORDER *all_lists[]= { join->order, join->group_list}; - for (int i=0; i < 2; i++) + Then we walk over the obtained "fdep_A_[i] OR fdep_B_[j]" pairs, and + - Discard those that that have left and right part referring to different + columns. We can't infer anything useful from "col1=expr1 OR col2=expr2". + - When left and right parts refer to the same column, we check if they are + essentially the same. + = If they are the same, we keep one copy + "t.col=expr OR t.col=expr" -> "t.col=expr + = if they are different , then we discard both + "t.col=expr1 OR t.col=expr2" -> (nothing useful) + + (no per-table or for-index FUNC_DEPS exist yet at this phase). + + See also merge_key_fields(). + + RETURN + End of the result array +*/ + +static +Equality_dep *merge_func_deps(Equality_dep *start, Equality_dep *new_fields, + Equality_dep *end, uint and_level) +{ + if (start == new_fields) + return start; // Impossible or + if (new_fields == end) + return start; // No new fields, skip all + + Equality_dep *first_free=new_fields; + + for (; new_fields != end ; new_fields++) { - for (ORDER *cur_list= all_lists[i]; cur_list; cur_list= cur_list->next) - used_tables |= (*(cur_list->item))->used_tables(); + for (Equality_dep *old=start ; old != first_free ; old++) + { + /* + TODO: does it make sense to attempt to merging multiple-equalities? + A: YES. + (a=b=c) OR (a=b=d) produce "a=b". + QQ: + What to use for merging? Trivial N*M algorithm or pre-sort and then + merge ordered sequences? + */ + if (old->field == new_fields->field) + { + if (!new_fields->val->const_item()) + { + /* + If the value matches, we can use the key reference. + If not, we keep it until we have examined all new values + */ + if (old->val->eq(new_fields->val, old->field->field->binary())) + { + old->level= and_level; + } + } + else if (old->val->eq_by_collation(new_fields->val, + old->field->field->binary(), + old->field->field->charset())) + { + old->level= and_level; + } + else + { + /* The expressions are different. */ + if (old == --first_free) // If last item + break; + *old= *first_free; // Remove old value + old--; // Retry this value + } + } + } } - - THD* thd= join->thd; - if (join->select_lex == &thd->lex->select_lex) + + /* + Ok, the results are within the [start, first_free) range, and the useful + elements have level==and_level. Now, lets remove all unusable elements: + */ + for (Equality_dep *old=start ; old != first_free ;) { - /* Multi-table UPDATE and DELETE: don't eliminate the tables we modify: */ - used_tables |= thd->table_map_for_update; + if (old->level != and_level) + { // Not used in all levels + if (old == --first_free) + break; + *old= *first_free; // Remove old value + continue; + } + old++; + } + return first_free; +} - /* Multi-table UPDATE: don't eliminate tables referred from SET statement */ - if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI) + +/* + Add a funcdep for a given equality. +*/ + +static +void add_funcdep(Table_elimination *te, + Equality_dep **eq_dep, uint and_level, + Item_func *cond, Field *field, + bool eq_func, Item **value, + uint num_values, table_map usable_tables) +{ + // Field *field= item_field->field; + if (!(field->table->map & usable_tables)) + return; + + for (uint i=0; i<num_values; i++) + { + if ((value[i])->used_tables() & RAND_TABLE_BIT) + return; + } + + /* + Save the following cases: + Field op constant + Field LIKE constant where constant doesn't start with a wildcard + Field = field2 where field2 is in a different table + Field op formula + Field IS NULL + Field IS NOT NULL + Field BETWEEN ... + Field IN ... + */ + + /* + We can't always use indexes when comparing a string index to a + number. cmp_type() is checked to allow compare of dates to numbers. + eq_func is NEVER true when num_values > 1 + */ + if (!eq_func) + { + /* + Additional optimization: if we're processing "t.key BETWEEN c1 AND c1" + then proceed as if we were processing "t.key = c1". + */ + if ((cond->functype() != Item_func::BETWEEN) || + ((Item_func_between*) cond)->negated || + !value[0]->eq(value[1], field->binary())) + return; + eq_func= TRUE; + } + + if (field->result_type() == STRING_RESULT) + { + if ((*value)->result_type() != STRING_RESULT) { - List_iterator<Item> it2(thd->lex->value_list); - while ((item= it2++)) - used_tables |= item->used_tables(); + if (field->cmp_type() != (*value)->result_type()) + return; + } + else + { + /* + We can't use indexes if the effective collation + of the operation differ from the field collation. + */ + if (field->cmp_type() == STRING_RESULT && + ((Field_str*)field)->charset() != cond->compare_collation()) + return; + } + } + + DBUG_ASSERT(eq_func); + /* Store possible eq field */ + (*eq_dep)->type= Func_dep::FD_EXPRESSION; //psergey-todo; + (*eq_dep)->field= get_field_dep(te, field); + (*eq_dep)->val= *value; + (*eq_dep)->level= and_level; + (*eq_dep)++; +} + + +Table_dep *get_table_dep(Table_elimination *te, TABLE *table) +{ + Table_dep *tbl_dep= new Table_dep(table); + Key_dep **key_list= &(tbl_dep->keys); + + /* Add dependencies for unique keys */ + for (uint i=0; i < table->s->keys; i++) + { + KEY *key= table->key_info + i; + if ((key->flags & (HA_NOSAME | HA_END_SPACE_KEY)) == HA_NOSAME) + { + Key_dep *key_dep= new Key_dep(tbl_dep, i, key->key_parts); + *key_list= key_dep; + key_list= &(key_dep->next_table_key); } } + return te->table_deps[table->tablenr] = tbl_dep; +} + +/* + Given a field, get its dependency element: if it already exists, find it, + otherwise create it. +*/ + +Field_dep *get_field_dep(Table_elimination *te, Field *field) +{ + TABLE *table= field->table; + Table_dep *tbl_dep; + + if (!(tbl_dep= te->table_deps[table->tablenr])) + tbl_dep= get_table_dep(te, table); + + Field_dep **pfield= &(tbl_dep->fields); + while (*pfield && (*pfield)->field->field_index < field->field_index) + { + pfield= &((*pfield)->next_table_field); + } + if (*pfield && (*pfield)->field->field_index == field->field_index) + return *pfield; - table_map all_tables= join->all_tables_map(); - if (all_tables & ~used_tables) + Field_dep *new_field= new Field_dep(tbl_dep, field); + + new_field->next_table_field= *pfield; + *pfield= new_field; + return new_field; +} + + +Outer_join_dep *get_outer_join_dep(Table_elimination *te, + TABLE_LIST *outer_join, table_map deps_map) +{ + Outer_join_dep *oj_dep; + oj_dep= new Outer_join_dep(outer_join, deps_map); + + Table_map_iterator it(deps_map); + int idx; + while ((idx= it.next_bit()) != Table_map_iterator::BITMAP_END) { - /* There are some tables that we probably could eliminate. Try it. */ - TABLE *leaves_array[MAX_TABLES]; - bool multiple_matches= FALSE; - eliminate_tables_for_list(join, leaves_array, join->join_list, FALSE, - all_tables, used_tables, &multiple_matches); + Table_dep *table_dep; + if (!(table_dep= te->table_deps[idx])) + { + TABLE *table= NULL; + /* + Locate and create the table. The search isnt very efficient but + typically we won't get here as we process the ON expression first + and that will create the Table_dep + */ + for (uint i= 0; i < te->join->tables; i++) + { + if (te->join->join_tab[i].table->tablenr == (uint)idx) + { + table= te->join->join_tab[i].table; + break; + } + } + DBUG_ASSERT(table); + table_dep= get_table_dep(te, table); + } + + if (!table_dep->outer_join_dep) + table_dep->outer_join_dep= oj_dep; + else + { + Outer_join_dep *oj= table_dep->outer_join_dep; + while (oj->parent) + oj= oj->parent; + oj->parent=oj_dep; + } + } - DBUG_VOID_RETURN; + return oj_dep; } + /* Perform table elimination in a given join list SYNOPSIS - eliminate_tables_for_list() - join The join - leaves_arr OUT Store here an array of leaf (base) tables that - are descendants of the join_list, and increment - the pointer to point right above the array. + collect_funcdeps_for_join_list() + te Table elimination context. join_list Join list to work on - its_outer_join TRUE <=> join_list is an inner side of an outer - join - FALSE <=> otherwise (this is top-level join list) + its_outer_join TRUE <=> the join_list is an inner side of an + outer join + FALSE <=> otherwise (this is top-level join + list, simplify_joins flattens out all + other kinds of join lists) + tables_in_list Bitmap of tables embedded in the join_list. tables_used_elsewhere Bitmap of tables that are referred to from somewhere outside of the join list (e.g. @@ -182,20 +737,18 @@ void eliminate_tables(JOIN *join) Number of children left after elimination. 0 means everything was eliminated. */ + static uint -eliminate_tables_for_list(JOIN *join, TABLE **leaves_arr, - List<TABLE_LIST> *join_list, - bool its_outer_join, - table_map tables_in_list, - table_map tables_used_elsewhere, - bool *multiple_matches) +collect_funcdeps_for_join_list(Table_elimination *te, + List<TABLE_LIST> *join_list, + bool build_eq_deps, + table_map tables_used_elsewhere, + table_map *eliminable_tables, + Equality_dep **eq_dep) { TABLE_LIST *tbl; List_iterator<TABLE_LIST> it(*join_list); table_map tables_used_on_left= 0; - TABLE **cur_table= leaves_arr; - bool children_have_multiple_matches= FALSE; - uint remaining_children= 0; while ((tbl= it++)) { @@ -203,253 +756,390 @@ eliminate_tables_for_list(JOIN *join, TABLE **leaves_arr, { table_map outside_used_tables= tables_used_elsewhere | tables_used_on_left; - bool multiple_matches= FALSE; + bool eliminable; + table_map cur_map; if (tbl->nested_join) { /* This is "... LEFT JOIN (join_nest) ON cond" */ - uint n; - if (!(n= eliminate_tables_for_list(join, cur_table, - &tbl->nested_join->join_list, TRUE, - tbl->nested_join->used_tables, - outside_used_tables, - &multiple_matches))) - { - mark_as_eliminated(join, tbl); - } - else - remaining_children++; - tbl->nested_join->n_tables= n; + cur_map= tbl->nested_join->used_tables; + eliminable= !(cur_map & outside_used_tables); + if (eliminable) + *eliminable_tables |= cur_map; + collect_funcdeps_for_join_list(te, &tbl->nested_join->join_list, + eliminable || build_eq_deps, + outside_used_tables, + eliminable_tables, + eq_dep); } else { /* This is "... LEFT JOIN tbl ON cond" */ - if (!(tbl->table->map & outside_used_tables) && - table_has_one_match(tbl->table, join->all_tables_map(), - &multiple_matches)) - { - mark_as_eliminated(join, tbl); - } - else - remaining_children++; + cur_map= tbl->table->map; + eliminable= !(tbl->table->map & outside_used_tables); + *eliminable_tables |= cur_map; } - tables_used_on_left |= tbl->on_expr->used_tables(); - children_have_multiple_matches= children_have_multiple_matches || - multiple_matches; - } - else - { - DBUG_ASSERT(!tbl->nested_join); - remaining_children++; - } - - if (tbl->table) - *(cur_table++)= tbl->table; - } - *multiple_matches |= children_have_multiple_matches; - - /* Try eliminating the nest we're called for */ - if (its_outer_join && !children_have_multiple_matches && - !(tables_in_list & tables_used_elsewhere)) - { - table_map bound_tables= join->const_table_map | (join->all_tables_map() & - ~tables_in_list); - table_map old_bound_tables; - TABLE **leaves_end= cur_table; - /* - Do the same as const table search table: try to expand the set of bound - tables until it covers all tables in the join_list - */ - do - { - old_bound_tables= bound_tables; - for (cur_table= leaves_arr; cur_table != leaves_end; cur_table++) + if (eliminable || build_eq_deps) { - if (!((*cur_table)->map & join->eliminated_tables) && - table_has_one_match(*cur_table, bound_tables, multiple_matches)) - { - bound_tables |= (*cur_table)->map; - } + // build comp_cond from ON expression + uint and_level=0; + build_funcdeps_for_cond(te, eq_dep, &and_level, tbl->on_expr, + *eliminable_tables); } - } while (old_bound_tables != bound_tables); - - if (!(tables_in_list & ~bound_tables)) - { - /* - This join_list can be eliminated. Signal about this to the caller by - returning number of tables. - */ - remaining_children= 0; + + if (eliminable) + te->oj_deps.push_back(get_outer_join_dep(te, tbl, cur_map)); + + tables_used_on_left |= tbl->on_expr->used_tables(); } } - return remaining_children; + return 0; } - /* - Check if the table will produce at most one matching record + Analyze exising FUNC_DEP array and add elements for tables and uniq keys SYNOPSIS - table_has_one_match() - table The [base] table being checked - bound_tables Tables that should be considered bound. - multiple_matches OUT Set to TRUE when there is no way we could - find find a limitation that would give us one-match - property. - + DESCRIPTION - Check if table will produce at most one matching record for each record - combination of tables in bound_tables bitmap. - - The check is based on ref analysis data, KEYUSE structures. We're - handling two cases: - - 1. Table has a UNIQUE KEY(uk_col_1, ... uk_col_N), and for each uk_col_i - there is a KEYUSE that represents a limitation in form - - table.uk_col_i = func(bound_tables) (X) + Add FUNC_DEP elements - 2. Same as above but we also handle limitations in form - - table.uk_col_i = func(bound_tables, uk_col_j1, ... uk_col_j2) (XX) - - where values of uk_col_jN are known to be bound because for them we - have an equality of form (X) or (XX). - - RETURN - TRUE Yes, at most one match - FALSE No + RETURN + . */ -static bool table_has_one_match(TABLE *table, table_map bound_tables, - bool *multiple_matches) +class Field_dependency_setter : public Field_enumerator { - KEYUSE *keyuse= table->reginfo.join_tab->keyuse; - if (keyuse) +public: + Field_dependency_setter(Table_elimination *te_arg): te(te_arg) + {} + + void see_field(Field *field) { - while (keyuse->table == table) + Table_dep *tbl_dep; + if ((tbl_dep= te->table_deps[field->table->tablenr])) { - uint key= keyuse->key; - key_part_map bound_parts=0; - uint n_unusable=0; - bool ft_key= test(keyuse->keypart == FT_KEYPART); - KEY *keyinfo= table->key_info + key; - KEYUSE *key_start = keyuse; - - do /* For each keypart and each way to read it */ + for (Field_dep *field_dep= tbl_dep->fields; field_dep; + field_dep= field_dep->next_table_field) { - if (keyuse->type == KEYUSE_USABLE) + if (field->field_index == field_dep->field->field_index) { - if(!(keyuse->used_tables & ~bound_tables) && - !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)) - { - bound_parts |= keyuse->keypart_map; - } + uint offs= field_dep->bitmap_offset + expr_offset; + if (!bitmap_is_set(&te->expr_deps, offs)) + te->equality_deps[expr_offset].unknown_args++; + bitmap_set_bit(&te->expr_deps, offs); + return; } - else - n_unusable++; - keyuse++; - } while (keyuse->table == table && keyuse->key == key); - - if (ft_key || ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) - != HA_NOSAME)) - { - continue; } + /* We didn't find the field. Bump the dependency anyway */ + te->equality_deps[expr_offset].unknown_args++; + } + } + Table_elimination *te; + uint expr_offset; /* Offset of the expression we're processing */ +}; - if (bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts) || - extra_keyuses_bind_all_keyparts(bound_tables, table, key_start, - keyuse, n_unusable, bound_parts)) + +static +bool setup_equality_deps(Table_elimination *te, Func_dep **bound_deps_list) +{ + DBUG_ENTER("setup_equality_deps"); + + uint offset= 0; + for (Table_dep **tbl_dep=te->table_deps; + tbl_dep < te->table_deps + MAX_TABLES; + tbl_dep++) + { + if (*tbl_dep) + { + for (Field_dep *field_dep= (*tbl_dep)->fields; + field_dep; + field_dep= field_dep->next_table_field) { - return TRUE; + field_dep->bitmap_offset= offset; + offset += te->n_equality_deps; } } } - return FALSE; + + void *buf; + if (!(buf= current_thd->alloc(bitmap_buffer_size(offset))) || + bitmap_init(&te->expr_deps, (my_bitmap_map*)buf, offset, FALSE)) + { + DBUG_RETURN(TRUE); + } + bitmap_clear_all(&te->expr_deps); + + /* + Walk through all field=expr elements and collect all fields. + */ + Func_dep *bound_dep= NULL; + Field_dependency_setter deps_setter(te); + for (Equality_dep *eq_dep= te->equality_deps; + eq_dep < te->equality_deps + te->n_equality_deps; + eq_dep++) + { + deps_setter.expr_offset= eq_dep - te->equality_deps; + eq_dep->unknown_args= 0; + eq_dep->val->walk(&Item::check_column_usage_processor, FALSE, + (uchar*)&deps_setter); + if (!eq_dep->unknown_args) + { + eq_dep->next= bound_dep; + bound_dep= eq_dep; + eq_dep->bound= TRUE; + } + } + *bound_deps_list= bound_dep; + + DBUG_EXECUTE("test", dbug_print_deps(te); ); + DBUG_RETURN(FALSE); } /* - Check if KEYUSE elemements with unusable==TRUE bind all parts of the key + Perform table elimination SYNOPSIS - - extra_keyuses_bind_all_keyparts() - bound_tables Tables which can be considered constants - table Table we're examining - key_start Start of KEYUSE array with elements describing the key - of interest - key_end End of the array + 1 - n_keyuses Number of elements in the array that have unusable==TRUE - bound_parts Key parts whose values are known to be bound. + eliminate_tables() + join Join to work on + const_tbl_count INOUT Number of constant tables (this includes + eliminated tables) + const_tables INOUT Bitmap of constant tables DESCRIPTION - Check if unusable KEYUSE elements cause all parts of key to be bound. An - unusable keyuse element makes a keypart bound when it - represents the following: + This function is the entry point for table elimination. + The idea behind table elimination is that if we have an outer join: + + SELECT * FROM t1 LEFT JOIN + (t2 JOIN t3) ON t3.primary_key=t1.col AND + t4.primary_key=t2.col + such that - keyXpartY=func(bound_columns, preceding_tables) + 1. columns of the inner tables are not used anywhere ouside the outer + join (not in WHERE, not in GROUP/ORDER BY clause, not in select list + etc etc), and + 2. inner side of the outer join is guaranteed to produce at most one + record combination for each record combination of outer tables. + + then the inner side of the outer join can be removed from the query. + This is because it will always produce one matching record (either a + real match or a NULL-complemented record combination), and since there + are no references to columns of the inner tables anywhere, it doesn't + matter which record combination it was. + + This function primary handles checking #1. It collects a bitmap of + tables that are not used in select list/GROUP BY/ORDER BY/HAVING/etc and + thus can possibly be eliminated. + + SIDE EFFECTS + See the OVERVIEW section at the top of this file. - RETURN - TRUE Yes, at most one match - FALSE No */ -static bool -extra_keyuses_bind_all_keyparts(table_map bound_tables, TABLE *table, - KEYUSE *key_start, KEYUSE *key_end, - uint n_keyuses, table_map bound_parts) +void eliminate_tables(JOIN *join) { - /* - We need - - some 'unusable' KEYUSE elements to work on - - some keyparts to be already bound to start inferences: - */ - if (n_keyuses && bound_parts) + THD* thd= join->thd; + Item *item; + table_map used_tables; + DBUG_ENTER("eliminate_tables"); + + DBUG_ASSERT(join->eliminated_tables == 0); + + /* If there are no outer joins, we have nothing to eliminate: */ + if (!join->outer_join) + DBUG_VOID_RETURN; + + /* Find the tables that are referred to from WHERE/HAVING */ + used_tables= (join->conds? join->conds->used_tables() : 0) | + (join->having? join->having->used_tables() : 0); + + /* Add tables referred to from the select list */ + List_iterator<Item> it(join->fields_list); + while ((item= it++)) + used_tables |= item->used_tables(); + + /* Add tables referred to from ORDER BY and GROUP BY lists */ + ORDER *all_lists[]= { join->order, join->group_list}; + for (int i=0; i < 2; i++) { - KEY *keyinfo= table->key_info + key_start->key; - bool bound_more_parts; - do + for (ORDER *cur_list= all_lists[i]; cur_list; cur_list= cur_list->next) + used_tables |= (*(cur_list->item))->used_tables(); + } + + if (join->select_lex == &thd->lex->select_lex) + { + /* Multi-table UPDATE and DELETE: don't eliminate the tables we modify: */ + used_tables |= thd->table_map_for_update; + + /* Multi-table UPDATE: don't eliminate tables referred from SET statement */ + if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI) { - bound_more_parts= FALSE; - for (KEYUSE *k= key_start; k!=key_end; k++) + List_iterator<Item> it2(thd->lex->value_list); + while ((item= it2++)) + used_tables |= item->used_tables(); + } + } + + table_map all_tables= join->all_tables_map(); + if (all_tables & ~used_tables) + { + /* There are some tables that we probably could eliminate. Try it. */ + Table_elimination te(join); + uint m= max(thd->lex->current_select->max_equal_elems,1); + uint max_elems= ((thd->lex->current_select->cond_count+1)*2 + + thd->lex->current_select->between_count)*m + 1 + 10; + if (!(te.equality_deps= new Equality_dep[max_elems])) + DBUG_VOID_RETURN; + Equality_dep *eq_deps_end= te.equality_deps; + table_map eliminable_tables= 0; + collect_funcdeps_for_join_list(&te, join->join_list, + FALSE, + used_tables, + &eliminable_tables, + &eq_deps_end); + te.n_equality_deps= eq_deps_end - te.equality_deps; + Func_dep *bound_dep; + setup_equality_deps(&te, &bound_dep); + + /* + Run the wave. + All Func_dep-derived objects are divided into three classes: + - Those that have bound=FALSE + - Those that have bound=TRUE + - Those that have bound=TRUE and are in the list.. + + */ + while (bound_dep) + { + Func_dep *next= bound_dep->next; + //e= list.remove_first(); + switch (bound_dep->type) { - if (k->type == KEYUSE_UNKNOWN) + case Func_dep::FD_EXPRESSION: { - Field_processor_info fp= {table, k->key, k->keypart, 0, 0}; - if (k->val->walk(&Item::check_column_usage_processor, FALSE, - (uchar*)&fp)) - k->type= KEYUSE_NO_BIND; - else + /* It's a field=expr and we got to know the expr, so we know the field */ + Equality_dep *eq_dep= (Equality_dep*)bound_dep; + if (!eq_dep->field->bound) { - k->used_tables= fp.used_tables; - k->keypart_map= fp.needed_key_parts; - k->type= KEYUSE_BIND; + /* Mark as bound and add to the list */ + eq_dep->field->bound= TRUE; + eq_dep->field->next= next; + next= eq_dep->field; } + break; } - - if (k->type == KEYUSE_BIND) + case Func_dep::FD_FIELD: { /* - If this is a binding keyuse, such that - - all tables it refers to are bound, - - all parts it refers to are bound - - but the key part it binds is not itself bound + Field became known. Check out + - unique keys we belong to + - expressions that depend on us. */ - if (!(k->used_tables & ~bound_tables) && - !(k->keypart_map & ~bound_parts) && - !(bound_parts & key_part_map(1) << k->keypart)) + Field_dep *field_dep= (Field_dep*)bound_dep; + for (Key_dep *key_dep= field_dep->table->keys; key_dep; + key_dep= key_dep->next_table_key) + { + DBUG_PRINT("info", ("key %s.%s is now bound", + key_dep->table->table->alias, + key_dep->table->table->key_info[key_dep->keyno].name)); + if (!key_dep->bound) + { + if (!--key_dep->n_missing_keyparts) + { + /* Mark as bound and add to the list */ + key_dep->bound= TRUE; + key_dep->next= next; + next= key_dep; + } + } + } + + /* Now, expressions */ + for (uint i=0; i < te.n_equality_deps; i++) { - bound_parts|= key_part_map(1) << k->keypart; - if (bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts)) - return TRUE; - bound_more_parts= TRUE; + if (bitmap_is_set(&te.expr_deps, field_dep->bitmap_offset + i)) + { + Equality_dep* eq_dep= &te.equality_deps[i]; + if (!--eq_dep->unknown_args) + { + /* Mark as bound and add to the list */ + eq_dep->bound= TRUE; + eq_dep->next= next; + next= eq_dep; + } + } } + break; } + case Func_dep::FD_UNIQUE_KEY: + { + /* Unique key is known means the table is known */ + Table_dep *table_dep=((Key_dep*)bound_dep)->table; + if (!table_dep->bound) + { + /* Mark as bound and add to the list */ + table_dep->bound= TRUE; + table_dep->next= next; + next= table_dep; + } + break; + } + case Func_dep::FD_TABLE: + { + Table_dep *table_dep=(Table_dep*)bound_dep; + DBUG_PRINT("info", ("table %s is now bound", + table_dep->table->alias)); + /* + Table is known means + - all its fields are known + - one more element in outer join nest is known + */ + for (Field_dep *field_dep= table_dep->fields; field_dep; + field_dep= field_dep->next_table_field) + { + if (!field_dep->bound) + { + /* Mark as bound and add to the list */ + field_dep->bound= TRUE; + field_dep->next= next; + next= field_dep; + } + } + Outer_join_dep *outer_join_dep= table_dep->outer_join_dep; + if (!(outer_join_dep->missing_tables &= ~table_dep->table->map)) + { + /* Mark as bound and add to the list */ + outer_join_dep->bound= TRUE; + outer_join_dep->next= next; + next= outer_join_dep; + } + break; + } + case Func_dep::FD_OUTER_JOIN: + { + Outer_join_dep *outer_join_dep= (Outer_join_dep*)bound_dep; + /* TODO what do here? Stop if eliminated the top-level? */ + mark_as_eliminated(te.join, outer_join_dep->table_list); + Outer_join_dep *parent= outer_join_dep->parent; + if (parent && + !(parent->missing_tables &= ~outer_join_dep->all_tables)) + { + /* Mark as bound and add to the list */ + parent->bound= TRUE; + parent->next= next; + next= parent; + } + break; + } + case Func_dep::FD_MULTI_EQUALITY: + default: + DBUG_ASSERT(0); } - } while (bound_more_parts); + bound_dep= next; + } } - return FALSE; + DBUG_VOID_RETURN; } @@ -488,6 +1178,96 @@ static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl) tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL); } + + +#ifndef DBUG_OFF +static +void dbug_print_deps(Table_elimination *te) +{ + DBUG_ENTER("dbug_print_deps"); + DBUG_LOCK_FILE; + + fprintf(DBUG_FILE,"deps {\n"); + + /* Start with printing equalities */ + for (Equality_dep *eq_dep= te->equality_deps; + eq_dep != te->equality_deps + te->n_equality_deps; eq_dep++) + { + char buf[128]; + String str(buf, sizeof(buf), &my_charset_bin); + str.length(0); + eq_dep->val->print(&str, QT_ORDINARY); + fprintf(DBUG_FILE, " equality%d: %s -> %s.%s\n", + eq_dep - te->equality_deps, + str.c_ptr(), + eq_dep->field->table->table->alias, + eq_dep->field->field->field_name); + } + fprintf(DBUG_FILE,"\n"); + + /* Then tables and their fields */ + for (uint i=0; i < MAX_TABLES; i++) + { + Table_dep *table_dep; + if ((table_dep= te->table_deps[i])) + { + /* Print table */ + fprintf(DBUG_FILE, " table %s\n", table_dep->table->alias); + /* Print fields */ + for (Field_dep *field_dep= table_dep->fields; field_dep; + field_dep= field_dep->next_table_field) + { + fprintf(DBUG_FILE, " field %s.%s ->", table_dep->table->alias, + field_dep->field->field_name); + uint ofs= field_dep->bitmap_offset; + for (uint bit= ofs; bit < ofs + te->n_equality_deps; bit++) + { + if (bitmap_is_set(&te->expr_deps, bit)) + fprintf(DBUG_FILE, " equality%d ", bit - ofs); + } + fprintf(DBUG_FILE, "\n"); + } + } + } + fprintf(DBUG_FILE,"\n}\n"); + DBUG_UNLOCK_FILE; + DBUG_VOID_RETURN; +} + +#endif + +/***********************************************************************************************/ + +#if 0 +static void dbug_print_fdep(FUNC_DEP *fd) +{ + switch (fd->type) { + case FUNC_DEP::FD_OUTER_JOIN: + { + fprintf(DBUG_FILE, "outer_join("); + if (fd->table_list->nested_join) + { + bool first= TRUE; + List_iterator<TABLE_LIST> it(fd->table_list->nested_join->join_list); + TABLE_LIST *tbl; + while ((tbl= it++)) + { + fprintf(DBUG_FILE, "%s%s", first?"":" ", + tbl->table? tbl->table->alias : "..."); + first= FALSE; + } + fprintf(DBUG_FILE, ")"); + } + else + fprintf(DBUG_FILE, "%s", fd->table_list->table->alias); + fprintf(DBUG_FILE, ")"); + break; + } + } +} + +#endif + /** @} (end of group Table_Elimination) */ diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h index 97accefe8aa..e07806a56ab 100644 --- a/sql/sql_bitmap.h +++ b/sql/sql_bitmap.h @@ -93,6 +93,34 @@ public: } }; +/* An iterator to quickly walk over bits in unlonglong bitmap. */ +class Table_map_iterator +{ + ulonglong bmp; + uint no; +public: + Table_map_iterator(ulonglong t) : bmp(t), no(0) {} + int next_bit() + { + static const char last_bit[16]= {32, 0, 1, 0, + 2, 0, 1, 0, + 3, 0, 1, 0, + 2, 0, 1, 0}; + uint bit; + while ((bit= last_bit[bmp & 0xF]) == 32) + { + no += 4; + bmp= bmp >> 4; + if (!bmp) + return BITMAP_END; + } + bmp &= ~(1LL << bit); + return no + bit; + } + int operator++(int) { return next_bit(); } + enum { BITMAP_END= 64 }; +}; + template <> class Bitmap<64> { ulonglong map; @@ -136,5 +164,10 @@ public: my_bool operator==(const Bitmap<64>& map2) const { return map == map2.map; } char *print(char *buf) const { longlong2str(map,buf,16); return buf; } ulonglong to_ulonglong() const { return map; } + class Iterator : public Table_map_iterator + { + public: + Iterator(Bitmap<64> &bmp) : Table_map_iterator(bmp.map) {} + }; }; |