diff options
Diffstat (limited to 'sql/sql_select.h')
-rw-r--r-- | sql/sql_select.h | 330 |
1 files changed, 234 insertions, 96 deletions
diff --git a/sql/sql_select.h b/sql/sql_select.h index 1b1bb6ded71..59f29239f5e 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -32,7 +32,9 @@ #include "sql_array.h" /* Array */ #include "records.h" /* READ_RECORD */ #include "opt_range.h" /* SQL_SELECT, QUICK_SELECT_I */ +#include "filesort.h" +typedef struct st_join_table JOIN_TAB; /* Values in optimize */ #define KEY_OPTIMIZE_EXISTS 1 @@ -184,7 +186,7 @@ enum sj_strategy_enum typedef enum_nested_loop_state (*Next_select_func)(JOIN *, struct st_join_table *, bool); -Next_select_func setup_end_select_func(JOIN *join); +Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab); int rr_sequential(READ_RECORD *info); int rr_sequential_and_unpack(READ_RECORD *info); @@ -198,9 +200,11 @@ int rr_sequential_and_unpack(READ_RECORD *info); class JOIN_CACHE; class SJ_TMP_TABLE; class JOIN_TAB_RANGE; +class AGGR_OP; +class Filesort; typedef struct st_join_table { - st_join_table() {} /* Remove gcc warning */ + st_join_table() {} TABLE *table; KEYUSE *keyuse; /**< pointer to first used key */ KEY *hj_key; /**< descriptor of the used best hash join key @@ -260,6 +264,7 @@ typedef struct st_join_table { */ uint packed_info; + // READ_RECORD::Setup_func materialize_table; READ_RECORD::Setup_func read_first_record; Next_select_func next_select; READ_RECORD read_record; @@ -346,6 +351,7 @@ typedef struct st_join_table { */ Item *cache_idx_cond; SQL_SELECT *cache_select; + AGGR_OP *aggr; JOIN *join; /* Embedding SJ-nest (may be not the direct parent), or NULL if none. @@ -412,6 +418,39 @@ typedef struct st_join_table { /* NestedOuterJoins: Bitmap of nested joins this table is part of */ nested_join_map embedding_map; + /* Tmp table info */ + TMP_TABLE_PARAM *tmp_table_param; + + /* Sorting related info */ + Filesort *filesort; + + /** + List of topmost expressions in the select list. The *next* JOIN TAB + in the plan should use it to obtain correct values. Same applicable to + all_fields. These lists are needed because after tmp tables functions + will be turned to fields. These variables are pointing to + tmp_fields_list[123]. Valid only for tmp tables and the last non-tmp + table in the query plan. + @see JOIN::make_tmp_tables_info() + */ + List<Item> *fields; + /** List of all expressions in the select list */ + List<Item> *all_fields; + /* + Pointer to the ref array slice which to switch to before sending + records. Valid only for tmp tables. + */ + Ref_ptr_array *ref_array; + + /** Number of records saved in tmp table */ + ha_rows send_records; + + /** HAVING condition for checking prior saving a record into tmp table*/ + Item *having; + + /** TRUE <=> remove duplicates on this table. */ + bool distinct; + /* Semi-join strategy to be used for this join table. This is a copy of POSITION::sj_strategy field. This field is set up by the @@ -426,9 +465,9 @@ typedef struct st_join_table { void cleanup(); inline bool is_using_loose_index_scan() { - return (select && select->quick && - (select->quick->get_type() == - QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)); + const SQL_SELECT *sel= filesort ? filesort->select : select; + return (sel && sel->quick && + (sel->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)); } bool is_using_agg_loose_index_scan () { @@ -563,16 +602,22 @@ typedef struct st_join_table { void save_explain_data(Explain_table_access *eta, table_map prefix_tables, bool distinct, struct st_join_table *first_top_tab); - void update_explain_data(uint idx); + bool use_order() const; ///< Use ordering provided by chosen index? + bool sort_table(); + bool remove_duplicates(); + } JOIN_TAB; #include "sql_join_cache.h" -enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool - end_of_records); -enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool - end_of_records); +enum_nested_loop_state +sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +enum_nested_loop_state +sub_select(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +enum_nested_loop_state +sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); + enum_nested_loop_state end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records); @@ -867,12 +912,14 @@ typedef struct st_position Sj_materialization_picker sjmat_picker; } POSITION; +typedef Bounds_checked_array<Item_null_result*> Item_null_array; + typedef struct st_rollup { enum State { STATE_NONE, STATE_INITED, STATE_READY }; State state; - Item_null_result **null_items; - Item ***ref_pointer_arrays; + Item_null_array null_items; + Ref_ptr_array *ref_pointer_arrays; List<Item> *fields; } ROLLUP; @@ -886,6 +933,56 @@ public: class Pushdown_query; +/** + @brief + Class to perform postjoin aggregation operations + + @details + The result records are obtained on the put_record() call. + The aggrgation process is determined by the write_func, it could be: + end_write Simply store all records in tmp table. + end_write_group Perform grouping using join->group_fields, + records are expected to be sorted. + end_update Perform grouping using the key generated on tmp + table. Input records aren't expected to be sorted. + Tmp table uses the heap engine + end_update_unique Same as above, but the engine is myisam. + + Lazy table initialization is used - the table will be instantiated and + rnd/index scan started on the first put_record() call. + +*/ + +class AGGR_OP :public Sql_alloc +{ +public: + JOIN_TAB *join_tab; + + AGGR_OP(JOIN_TAB *tab) : join_tab(tab), write_func(NULL) + {}; + + enum_nested_loop_state put_record() { return put_record(false); }; + /* + Send the result of operation further (to a next operation/client) + This function is called after all records were put into tmp table. + + @return return one of enum_nested_loop_state values. + */ + enum_nested_loop_state end_send(); + /** write_func setter */ + void set_write_func(Next_select_func new_write_func) + { + write_func= new_write_func; + } + +private: + /** Write function that would be used for saving records in tmp table. */ + Next_select_func write_func; + enum_nested_loop_state put_record(bool end_of_records); + bool prepare_tmp_table(); +}; + + class JOIN :public Sql_alloc { private: @@ -954,14 +1051,6 @@ protected: public: JOIN_TAB *join_tab, **best_ref; - - /* - Saved join_tab for pre_sorting. create_sort_index() will save here.. - */ - JOIN_TAB *pre_sort_join_tab; - uint pre_sort_index; - Item *pre_sort_idx_pushed_cond; - void clean_pre_sort_join_tab(); /* List of fields that aren't under an aggregate function */ List<Item_field> non_agg_fields; @@ -979,8 +1068,6 @@ public: uint top_table_access_tabs_count; JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs - JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution - List<JOIN_TAB_RANGE> join_tab_ranges; /* @@ -1011,6 +1098,7 @@ public: We keep it here so that it is saved/restored with JOIN::restore_tmp. */ uint top_join_tab_count; + uint aggr_tables; ///< Number of post-join tmp tables uint send_group_parts; /* This counts how many times do_select() was invoked for this JOIN. @@ -1123,6 +1211,7 @@ public: */ table_map complex_firstmatch_tables; + Next_select_func first_select; /* The cost of best complete join plan found so far during optimization, after optimization phase - cost of picked join order (not taking into @@ -1138,9 +1227,6 @@ public: double join_record_count; List<Item> *fields; List<Cached_item> group_fields, group_fields_cache; - TABLE *tmp_table; - /// used to store 2 possible tmp table of SELECT - TABLE *exec_tmp_table1, *exec_tmp_table2; THD *thd; Item_sum **sum_funcs, ***sum_funcs_end; /** second copy of sumfuncs (for queries with 2 temporary tables */ @@ -1149,6 +1235,8 @@ public: Item *having; Item *tmp_having; ///< To store having when processed temporary table Item *having_history; ///< Store having for explain + ORDER *group_list_for_estimates; + bool having_is_correlated; ulonglong select_options; /* Bitmap of allowed types of the join caches that @@ -1187,26 +1275,6 @@ public: */ bool filesort_found_rows; - /** - Copy of this JOIN to be used with temporary tables. - - tmp_join is used when the JOIN needs to be "reusable" (e.g. in a - subquery that gets re-executed several times) and we know will use - temporary tables for materialization. The materialization to a - temporary table overwrites the JOIN structure to point to the - temporary table after the materialization is done. This is where - tmp_join is used : it's a copy of the JOIN before the - materialization and is used in restoring before re-execution by - overwriting the current JOIN structure with the saved copy. - Because of this we should pay extra care of not freeing up helper - structures that are referenced by the original contents of the - JOIN. We can check for this by making sure the "current" join is - not the temporary copy, e.g. !tmp_join || tmp_join != join - - We should free these sub-structures at JOIN::destroy() if the - "current" join has a copy is not that copy. - */ - JOIN *tmp_join; ROLLUP rollup; ///< Used with rollup bool mixed_implicit_grouping; @@ -1228,6 +1296,19 @@ public: GROUP/ORDER BY. */ bool simple_order, simple_group; + + /* + ordered_index_usage is set if an ordered index access + should be used instead of a filesort when computing + ORDER/GROUP BY. + */ + enum + { + ordered_index_void, // No ordered index avail. + ordered_index_group_by, // Use index for GROUP BY + ordered_index_order_by // Use index for ORDER BY + } ordered_index_usage; + /** Is set only in case if we have a GROUP BY clause and no ORDER BY after constant elimination of 'order'. @@ -1280,10 +1361,19 @@ public: List<Item> exec_const_order_group_cond; SQL_SELECT *select; ///<created in optimisation phase JOIN_TAB *return_tab; ///<used only for outer joins - Item **ref_pointer_array; ///<used pointer reference for this select - // Copy of above to be used with different lists - Item **items0, **items1, **items2, **items3, **current_ref_pointer_array; - uint ref_pointer_array_size; ///< size of above in bytes + + /* + Used pointer reference for this select. + select_lex->ref_pointer_array contains five "slices" of the same length: + |========|========|========|========|========| + ref_ptrs items0 items1 items2 items3 + */ + Ref_ptr_array ref_ptrs; + // Copy of the initial slice above, to be used with different lists + Ref_ptr_array items0, items1, items2, items3; + // Used by rollup, to restore ref_ptrs after overwriting it. + Ref_ptr_array current_ref_ptrs; + const char *zero_result_cause; ///< not 0 if exec must return zero result bool union_part; ///< this subselect is part of union @@ -1310,20 +1400,12 @@ public: /* SJM nests that are executed with SJ-Materialization strategy */ List<SJ_MATERIALIZATION_INFO> sjm_info_list; - /* - storage for caching buffers allocated during query execution. - These buffers allocations need to be cached as the thread memory pool is - cleared only at the end of the execution of the whole query and not caching - allocations that occur in repetition at execution time will result in - excessive memory usage. - Note: make_simple_join always creates an execution plan that accesses - a single table, thus it is sufficient to have a one-element array for - table_reexec. - */ - SORT_FIELD *sortorder; // make_unireg_sortorder() - TABLE *table_reexec[1]; // make_simple_join() - JOIN_TAB *join_tab_reexec; // make_simple_join() - /* end of allocation caching storage */ + /** TRUE <=> ref_pointer_array is set to items3. */ + bool set_group_rpa; + /** Exec time only: TRUE <=> current group has been sent */ + bool group_sent; + + JOIN_TAB *sort_and_group_aggr_tab; JOIN(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg, select_result *result_arg) @@ -1335,12 +1417,13 @@ public: void init(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg, select_result *result_arg) { - join_tab= join_tab_save= 0; + join_tab= 0; table= 0; table_count= 0; top_join_tab_count= 0; const_tables= 0; const_table_map= 0; + aggr_tables= 0; eliminated_tables= 0; join_list= 0; implicit_grouping= FALSE; @@ -1350,25 +1433,21 @@ public: send_records= 0; found_records= 0; fetch_limit= HA_POS_ERROR; - join_examined_rows= 0; - exec_tmp_table1= 0; - exec_tmp_table2= 0; - sortorder= 0; - table_reexec[0]= 0; - join_tab_reexec= 0; thd= thd_arg; sum_funcs= sum_funcs2= 0; procedure= 0; having= tmp_having= having_history= 0; + having_is_correlated= false; + group_list_for_estimates= 0; select_options= select_options_arg; result= result_arg; lock= thd_arg->lock; select_lex= 0; //for safety - tmp_join= 0; select_distinct= MY_TEST(select_options & SELECT_DISTINCT); no_order= 0; simple_order= 0; simple_group= 0; + ordered_index_usage= ordered_index_void; need_distinct= 0; skip_sort_order= 0; need_tmp= 0; @@ -1376,8 +1455,11 @@ public: error= 0; select= 0; return_tab= 0; - ref_pointer_array= items0= items1= items2= items3= 0; - ref_pointer_array_size= 0; + ref_ptrs.reset(); + items0.reset(); + items1.reset(); + items2.reset(); + items3.reset(); zero_result_cause= 0; optimized= 0; have_query_plan= QEP_NOT_PRESENT_YET; @@ -1405,10 +1487,13 @@ public: rollup.state= ROLLUP::STATE_NONE; no_const_tables= FALSE; + first_select= sub_select; + set_group_rpa= false; + group_sent= 0; + outer_ref_cond= pseudo_bits_cond= NULL; in_to_exists_where= NULL; in_to_exists_having= NULL; - pre_sort_join_tab= NULL; emb_sjm_nest= NULL; sjm_lookup_tables= 0; @@ -1420,7 +1505,10 @@ public: table_access_tabs= NULL; } - int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num, + /* True if the plan guarantees that it will be returned zero or one row */ + bool only_const_tables() { return const_tables == table_count; } + + int prepare(TABLE_LIST *tables, uint wind_num, COND *conds, uint og_num, ORDER *order, bool skip_order_by, ORDER *group, Item *having, ORDER *proc_param, SELECT_LEX *select, SELECT_LEX_UNIT *unit); @@ -1431,6 +1519,7 @@ public: int init_execution(); void exec(); void exec_inner(); + bool prepare_result(List<Item> **columns_list); int destroy(); void restore_tmp(); bool alloc_func_list(); @@ -1440,16 +1529,42 @@ public: bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields, bool before_group_by, bool recompute= FALSE); - inline void set_items_ref_array(Item **ptr) + /// Initialzes a slice, see comments for ref_ptrs above. + Ref_ptr_array ref_ptr_array_slice(size_t slice_num) + { + size_t slice_sz= select_lex->ref_pointer_array.size() / 5U; + DBUG_ASSERT(select_lex->ref_pointer_array.size() % 5 == 0); + DBUG_ASSERT(slice_num < 5U); + return Ref_ptr_array(&select_lex->ref_pointer_array[slice_num * slice_sz], + slice_sz); + } + + /** + Overwrites one slice with the contents of another slice. + In the normal case, dst and src have the same size(). + However: the rollup slices may have smaller size than slice_sz. + */ + void copy_ref_ptr_array(Ref_ptr_array dst_arr, Ref_ptr_array src_arr) + { + DBUG_ASSERT(dst_arr.size() >= src_arr.size()); + void *dest= dst_arr.array(); + const void *src= src_arr.array(); + memcpy(dest, src, src_arr.size() * src_arr.element_size()); + } + + /// Overwrites 'ref_ptrs' and remembers the the source as 'current'. + void set_items_ref_array(Ref_ptr_array src_arr) { - memcpy((char*) ref_pointer_array, (char*) ptr, ref_pointer_array_size); - current_ref_pointer_array= ptr; + copy_ref_ptr_array(ref_ptrs, src_arr); + current_ref_ptrs= src_arr; } - inline void init_items_ref_array() + + /// Initializes 'items0' and remembers that it is 'current'. + void init_items_ref_array() { - items0= ref_pointer_array + all_fields.elements; - memcpy(items0, ref_pointer_array, ref_pointer_array_size); - current_ref_pointer_array= items0; + items0= ref_ptr_array_slice(1); + copy_ref_ptr_array(items0, ref_ptrs); + current_ref_ptrs= items0; } bool rollup_init(); @@ -1458,18 +1573,10 @@ public: Item_sum ***func); int rollup_send_data(uint idx); int rollup_write_data(uint idx, TABLE *table); - /** - Release memory and, if possible, the open tables held by this execution - plan (and nested plans). It's used to release some tables before - the end of execution in order to increase concurrency and reduce - memory consumption. - */ void join_free(); /** Cleanup this JOIN, possibly for reuse */ void cleanup(bool full); void clear(); - bool save_join_tab(); - bool init_save_join_tab(); bool send_row_on_empty_set() { return (do_send_rows && implicit_grouping && !group_optimized_away && @@ -1488,6 +1595,8 @@ public: return (table_map(1) << table_count) - 1; } void drop_unused_derived_keys(); + bool get_best_combination(); + bool add_sorting_to_table(JOIN_TAB *tab, ORDER *order); inline void eval_select_list_used_tables(); /* Return the table for which an index scan can be used to satisfy @@ -1553,12 +1662,41 @@ public: JOIN_TAB *first_breadth_first_execution_tab() { return join_tab; } private: /** + Create a temporary table to be used for processing DISTINCT/ORDER + BY/GROUP BY. + + @note Will modify JOIN object wrt sort/group attributes + + @param tab the JOIN_TAB object to attach created table to + @param tmp_table_fields List of items that will be used to define + column types of the table. + @param tmp_table_group Group key to use for temporary table, NULL if none. + @param save_sum_fields If true, do not replace Item_sum items in + @c tmp_fields list with Item_field items referring + to fields in temporary table. + + @returns false on success, true on failure + */ + bool create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *tmp_table_fields, + ORDER *tmp_table_group, + bool save_sum_fields, + bool distinct, + bool keep_row_ordermake); + /** + Optimize distinct when used on a subset of the tables. + + E.g.,: SELECT DISTINCT t1.a FROM t1,t2 WHERE t1.b=t2.b + In this case we can stop scanning t2 when we have found one t1.a + */ + void optimize_distinct(); + + /** TRUE if the query contains an aggregate function but has no GROUP BY clause. */ bool implicit_grouping; - bool make_simple_join(JOIN *join, TABLE *tmp_table); void cleanup_item_list(List<Item> &items) const; + bool make_aggr_tables_info(); }; enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS}; @@ -1583,7 +1721,7 @@ extern const char *join_type_str[]; void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, List<Item> &fields, bool reset_with_sum_func); bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, - Item **ref_pointer_array, + Ref_ptr_array ref_pointer_array, List<Item> &new_list1, List<Item> &new_list2, uint elements, List<Item> &fields); void copy_fields(TMP_TABLE_PARAM *param); @@ -1824,19 +1962,19 @@ int safe_index_read(JOIN_TAB *tab); int get_quick_record(SQL_SELECT *select); SORT_FIELD * make_unireg_sortorder(THD *thd, ORDER *order, uint *length, SORT_FIELD *sortorder); -int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, +int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, List<Item> &fields, List <Item> &all_fields, ORDER *order); -int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, +int setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, List<Item> &fields, List<Item> &all_fields, ORDER *order, bool *hidden_group_fields); bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, - Item **ref_pointer_array); + Ref_ptr_array ref_pointer_array); int join_read_key2(THD *thd, struct st_join_table *tab, TABLE *table, struct st_table_ref *table_ref); bool handle_select(THD *thd, LEX *lex, select_result *result, ulong setup_tables_done_option); -bool mysql_select(THD *thd, Item ***rref_pointer_array, +bool mysql_select(THD *thd, TABLE_LIST *tables, uint wild_num, List<Item> &list, COND *conds, uint og_num, ORDER *order, ORDER *group, Item *having, ORDER *proc_param, ulonglong select_type, |