diff options
Diffstat (limited to 'sql/sql_select.h')
-rw-r--r-- | sql/sql_select.h | 222 |
1 files changed, 199 insertions, 23 deletions
diff --git a/sql/sql_select.h b/sql/sql_select.h index f0eda9671cb..abb9a98030e 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -165,13 +165,25 @@ enum enum_nested_loop_state typedef enum_nested_loop_state (*Next_select_func)(JOIN *, struct st_join_table *, bool); + +/* + Function prototype for reading first record for a join tab + + RETURN + 0 - OK + -1 - Record not found + Other - A fatal error +*/ typedef int (*Read_record_func)(struct st_join_table *tab); + Next_select_func setup_end_select_func(JOIN *join); int rr_sequential(READ_RECORD *info); +int rr_sequential_and_unpack(READ_RECORD *info); class JOIN_CACHE; class SJ_TMP_TABLE; +class JOIN_TAB_RANGE; typedef struct st_join_table { st_join_table() {} /* Remove gcc warning */ @@ -200,6 +212,21 @@ typedef struct st_join_table { st_join_table *last_inner; /**< last table table for embedding outer join */ st_join_table *first_upper; /**< first inner table for embedding outer join */ st_join_table *first_unmatched; /**< used for optimization purposes only */ + + /* + For join tabs that are inside an SJM bush: root of the bush + */ + st_join_table *bush_root_tab; + + /* TRUE <=> This join_tab is inside an SJM bush and is the last leaf tab here */ + bool last_leaf_in_bush; + + /* + ptr - this is a bush, and ptr points to description of child join_tab + range + NULL - this join tab has no bush children + */ + JOIN_TAB_RANGE *bush_children; /* Special content for EXPLAIN 'Extra' column or NULL if none */ const char *info; @@ -237,7 +264,13 @@ typedef struct st_join_table { method (but not 'index' for some reason), i.e. this matches method which E(#records) is in found_records. */ - ha_rows read_time; + double read_time; + + /* psergey-todo: make the below have type double, like POSITION::records_read? */ + ha_rows records_read; + + /* Startup cost for execution */ + double startup_cost; double partial_join_cardinality; @@ -330,11 +363,13 @@ typedef struct st_join_table { /* Semi-join strategy to be used for this join table. This is a copy of POSITION::sj_strategy field. This field is set up by the - fix_semijion_strategies_for_picked_join_order. + fix_semijoin_strategies_for_picked_join_order. */ uint sj_strategy; - struct st_join_table *first_sjm_sibling; + uint n_sj_tables; + + bool preread_init_done; void cleanup(); inline bool is_using_loose_index_scan() @@ -441,6 +476,8 @@ typedef struct st_join_table { { return (is_hash_join_key_no(key) ? hj_key : table->key_info+key); } + double scan_time(); + bool preread_init(); } JOIN_TAB; @@ -450,9 +487,6 @@ enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool end_of_records); -enum_nested_loop_state sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, - bool end_of_records); - enum_nested_loop_state end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records); @@ -554,6 +588,8 @@ typedef struct st_position */ table_map firstmatch_need_tables; + bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); } + void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; } /* Duplicate Weedout strategy */ /* The first table that the strategy will need to handle */ @@ -600,26 +636,99 @@ inline bool sj_is_materialize_strategy(uint strategy) return strategy >= SJ_OPT_MATERIALIZE; } +class JOIN_TAB_RANGE: public Sql_alloc +{ +public: + JOIN_TAB *start; + JOIN_TAB *end; +}; + class JOIN :public Sql_alloc { +private: JOIN(const JOIN &rhs); /**< not implemented */ JOIN& operator=(const JOIN &rhs); /**< not implemented */ + +protected: + + /** + The subset of the state of a JOIN that represents an optimized query + execution plan. Allows saving/restoring different JOIN plans for the same + query. + */ + class Join_plan_state { + public: + DYNAMIC_ARRAY keyuse; /* Copy of the JOIN::keyuse array. */ + POSITION best_positions[MAX_TABLES+1]; /* Copy of JOIN::best_positions */ + /* Copies of the JOIN_TAB::keyuse pointers for each JOIN_TAB. */ + KEYUSE *join_tab_keyuse[MAX_TABLES]; + /* Copies of JOIN_TAB::checked_keys for each JOIN_TAB. */ + key_map join_tab_checked_keys[MAX_TABLES]; + public: + Join_plan_state() + { + keyuse.elements= 0; + keyuse.buffer= NULL; + } + Join_plan_state(JOIN *join); + ~Join_plan_state() + { + delete_dynamic(&keyuse); + } + }; + + /* Results of reoptimizing a JOIN via JOIN::reoptimize(). */ + enum enum_reopt_result { + REOPT_NEW_PLAN, /* there is a new reoptimized plan */ + REOPT_OLD_PLAN, /* no new improved plan can be found, use the old one */ + REOPT_ERROR, /* an irrecovarable error occured during reoptimization */ + REOPT_NONE /* not yet reoptimized */ + }; + + /* Support for plan reoptimization with rewritten conditions. */ + enum_reopt_result reoptimize(Item *added_where, table_map join_tables, + Join_plan_state *save_to); + void save_query_plan(Join_plan_state *save_to); + void restore_query_plan(Join_plan_state *restore_from); + /* Choose a subquery plan for a table-less subquery. */ + bool choose_tableless_subquery_plan(); + public: - JOIN_TAB *join_tab,**best_ref; + JOIN_TAB *join_tab, **best_ref; JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution + + List<JOIN_TAB_RANGE> join_tab_ranges; + + /* + Base tables participating in the join. After join optimization is done, the + tables are stored in the join order (but the only really important part is + that const tables are first). + */ TABLE **table; - TABLE **all_tables; /** The table which has an index that allows to produce the requried ordering. A special value of 0x1 means that the ordering will be produced by passing 1st non-const table to filesort(). NULL means no such table exists. */ TABLE *sort_by_table; - uint tables; /**< Number of tables in the join */ + /* + Number of tables in the join. + (In MySQL, it is named 'tables' and is also the number of elements in + join->join_tab array. In MariaDB, the latter is not true, so we've renamed + the variable) + */ + uint table_count; uint outer_tables; /**< Number of tables that are not inside semijoin */ uint const_tables; + /* + Number of tables in the top join_tab array. Normally this matches + (join_tab_ranges.head()->end - join_tab_ranges.head()->start). + + We keep it here so that it is saved/restored with JOIN::restore_tmp. + */ + uint top_join_tab_count; uint send_group_parts; bool group; /**< If query contains GROUP BY clause */ /** @@ -646,7 +755,8 @@ public: /* Tables removed by table elimination. Set to 0 before the elimination. */ table_map eliminated_tables; /* - Bitmap of all inner tables from outer joins + Bitmap of all inner tables from outer joins (set at start of + make_join_statistics) */ table_map outer_join; ha_rows send_records,found_records,examined_rows,row_limit, select_limit; @@ -695,13 +805,19 @@ public: /* We also maintain a stack of join optimization states in * join->positions[] */ /******* Join optimization state members end *******/ - Next_select_func first_select; /* The cost of best complete join plan found so far during optimization, after optimization phase - cost of picked join order (not taking into account the changes made by test_if_skip_sort_order()). */ double best_read; + /* + Estimated result rows (fanout) of the join operation. If this is a subquery + that is reexecuted multiple times, this value includes the estiamted # of + reexecutions. This value is equal to the multiplication of all + join->positions[i].records_read of a JOIN. + */ + double record_count; List<Item> *fields; List<Cached_item> group_fields, group_fields_cache; TABLE *tmp_table; @@ -815,6 +931,19 @@ public: List<TABLE_LIST> *join_list; ///< list of joined tables in reverse order COND_EQUAL *cond_equal; COND_EQUAL *having_equal; + /* + Constant codition computed during optimization, but evaluated during + join execution. Typically expensive conditions that should not be + evaluated at optimization time. + */ + Item *exec_const_cond; + /* + Constant ORDER and/or GROUP expressions that contain subqueries. Such + expressions need to evaluated to verify that the subquery indeed + returns a single row. The evaluation of such expressions is delayed + until query execution. + */ + List<Item> exec_const_order_group_cond; SQL_SELECT *select; ///<created in optimisation phase JOIN_TAB *return_tab; ///<used only for outer joins Item **ref_pointer_array; ///<used pointer reference for this select @@ -825,9 +954,20 @@ public: bool union_part; ///< this subselect is part of union bool optimized; ///< flag to avoid double optimization in EXPLAIN + bool initialized; ///< flag to avoid double init_execution calls + /* + Subqueries that will need to be converted to semi-join nests, including + those converted to jtbm nests. The list is emptied when conversion is done. + */ Array<Item_in_subselect> sj_subselects; - + /* + Additional WHERE and HAVING predicates to be considered for IN=>EXISTS + subquery transformation of a JOIN object. + */ + Item *in_to_exists_where; + Item *in_to_exists_having; + /* Temporary tables used to weed-out semi-join duplicates */ List<TABLE> sj_tmp_tables; /* SJM nests that are executed with SJ-Materialization strategy */ @@ -860,7 +1000,8 @@ public: { join_tab= join_tab_save= 0; table= 0; - tables= 0; + table_count= 0; + top_join_tab_count= 0; const_tables= 0; eliminated_tables= 0; join_list= 0; @@ -901,8 +1042,10 @@ public: ref_pointer_array_size= 0; zero_result_cause= 0; optimized= 0; + initialized= 0; cond_equal= 0; having_equal= 0; + exec_const_cond= 0; group_optimized_away= 0; no_rows_in_result_called= 0; @@ -915,22 +1058,25 @@ public: rollup.state= ROLLUP::STATE_NONE; no_const_tables= FALSE; - first_select= sub_select; outer_ref_cond= 0; + in_to_exists_where= NULL; + in_to_exists_having= NULL; } int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num, COND *conds, uint og_num, ORDER *order, ORDER *group, Item *having, ORDER *proc_param, SELECT_LEX *select, SELECT_LEX_UNIT *unit); + bool prepare_stage2(); int optimize(); int reinit(); + int init_execution(); void exec(); int destroy(); void restore_tmp(); bool alloc_func_list(); bool flatten_subqueries(); - bool setup_subquery_materialization(); + bool optimize_unflattened_subqueries(); bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields, bool before_group_by, bool recompute= FALSE); @@ -966,7 +1112,7 @@ public: bool init_save_join_tab(); bool send_row_on_empty_set() { - return (do_send_rows && implicit_grouping && + return (do_send_rows && implicit_grouping && !group_optimized_away && having_value != Item::COND_FALSE); } bool change_result(select_result *result); @@ -977,8 +1123,9 @@ public: } inline table_map all_tables_map() { - return (table_map(1) << tables) - 1; + return (table_map(1) << table_count) - 1; } + void drop_unused_derived_keys(); /* Return the table for which an index scan can be used to satisfy the sort order needed by the ORDER BY/(implicit) GROUP BY clause @@ -999,7 +1146,21 @@ public: return test(allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) && max_allowed_join_cache_level > JOIN_CACHE_HASHED_BIT; } - + bool choose_subquery_plan(table_map join_tables); + void get_partial_cost_and_fanout(uint end_tab_idx, + table_map filter_map, + double *read_time_arg, + double *record_count_arg); + void get_prefix_cost_and_fanout(uint n_tables, + double *read_time_arg, + double *record_count_arg); + /* defined in opt_subselect.cc */ + bool transform_max_min_subquery(); + /* True if this JOIN is a subquery under an IN predicate. */ + bool is_in_subquery() + { + return (unit->item && unit->item->is_in_predicate()); + } private: /** TRUE if the query contains an aggregate function but has no GROUP @@ -1010,6 +1171,15 @@ private: void cleanup_item_list(List<Item> &items) const; }; +enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS}; +enum enum_with_const_tables { WITH_CONST_TABLES, WITHOUT_CONST_TABLES}; + +JOIN_TAB *first_linear_tab(JOIN *join, enum enum_with_const_tables const_tbls); +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, + enum enum_with_bush_roots include_bush_roots); + +JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const); +JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab); typedef struct st_select_check { uint const_ref,reg_ref; @@ -1037,7 +1207,8 @@ Field* create_tmp_field_from_field(THD *thd, Field* org_field, /* functions from opt_sum.cc */ bool simple_pred(Item_func *func_item, Item **args, bool *inv_order); -int opt_sum_query(TABLE_LIST *tables, List<Item> &all_fields,COND *conds); +int opt_sum_query(THD* thd, + List<TABLE_LIST> &tables, List<Item> &all_fields, COND *conds); /* from sql_delete.cc, used by opt_range.cc */ extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b); @@ -1233,6 +1404,9 @@ protected: if (!inited) { inited=1; + TABLE *table= to_field->table; + my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, + table->write_set); if ((res= item->save_in_field(to_field, 1))) { if (!err) @@ -1244,6 +1418,7 @@ protected: */ if (!err && to_field->table->in_use->is_error()) err= 1; /* STORE_KEY_FATAL */ + dbug_tmp_restore_column_map(table->write_set, old_map); } null_key= to_field->is_null() || item->null_value; return (err > 2 ? STORE_KEY_FATAL : (store_key_result) err); @@ -1257,14 +1432,13 @@ int safe_index_read(JOIN_TAB *tab); COND *remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value); int test_if_item_cache_changed(List<Cached_item> &list); int join_init_read_record(JOIN_TAB *tab); +int join_read_record_no_init(JOIN_TAB *tab); void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key); inline Item * and_items(Item* cond, Item *item) { return (cond? (new Item_cond_and(cond, item)) : item); } -bool choose_plan(JOIN *join,table_map join_tables); -void get_partial_join_cost(JOIN *join, uint n_tables, double *read_time_arg, - double *record_count_arg); +bool choose_plan(JOIN *join, table_map join_tables); void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, table_map last_remaining_tables, bool first_alt, uint no_jbuf_before, @@ -1297,7 +1471,7 @@ void push_index_cond(JOIN_TAB *tab, uint keyno); TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, ORDER *group, bool distinct, bool save_sum_fields, ulonglong select_options, ha_rows rows_limit, - char* alias); + char* alias, bool do_not_open=FALSE); void free_tmp_table(THD *thd, TABLE *entry); bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table, ENGINE_COLUMNDEF *start_recinfo, @@ -1309,5 +1483,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, ulonglong options); bool open_tmp_table(TABLE *table); void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps); +double prev_record_reads(POSITION *positions, uint idx, table_map found_ref); +void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist); #endif /* SQL_SELECT_INCLUDED */ |