summaryrefslogtreecommitdiff
path: root/sql/sql_select.h
diff options
context:
space:
mode:
Diffstat (limited to 'sql/sql_select.h')
-rw-r--r--sql/sql_select.h330
1 files changed, 234 insertions, 96 deletions
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 1b1bb6ded71..59f29239f5e 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -32,7 +32,9 @@
#include "sql_array.h" /* Array */
#include "records.h" /* READ_RECORD */
#include "opt_range.h" /* SQL_SELECT, QUICK_SELECT_I */
+#include "filesort.h"
+typedef struct st_join_table JOIN_TAB;
/* Values in optimize */
#define KEY_OPTIMIZE_EXISTS 1
@@ -184,7 +186,7 @@ enum sj_strategy_enum
typedef enum_nested_loop_state
(*Next_select_func)(JOIN *, struct st_join_table *, bool);
-Next_select_func setup_end_select_func(JOIN *join);
+Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab);
int rr_sequential(READ_RECORD *info);
int rr_sequential_and_unpack(READ_RECORD *info);
@@ -198,9 +200,11 @@ int rr_sequential_and_unpack(READ_RECORD *info);
class JOIN_CACHE;
class SJ_TMP_TABLE;
class JOIN_TAB_RANGE;
+class AGGR_OP;
+class Filesort;
typedef struct st_join_table {
- st_join_table() {} /* Remove gcc warning */
+ st_join_table() {}
TABLE *table;
KEYUSE *keyuse; /**< pointer to first used key */
KEY *hj_key; /**< descriptor of the used best hash join key
@@ -260,6 +264,7 @@ typedef struct st_join_table {
*/
uint packed_info;
+ // READ_RECORD::Setup_func materialize_table;
READ_RECORD::Setup_func read_first_record;
Next_select_func next_select;
READ_RECORD read_record;
@@ -346,6 +351,7 @@ typedef struct st_join_table {
*/
Item *cache_idx_cond;
SQL_SELECT *cache_select;
+ AGGR_OP *aggr;
JOIN *join;
/*
Embedding SJ-nest (may be not the direct parent), or NULL if none.
@@ -412,6 +418,39 @@ typedef struct st_join_table {
/* NestedOuterJoins: Bitmap of nested joins this table is part of */
nested_join_map embedding_map;
+ /* Tmp table info */
+ TMP_TABLE_PARAM *tmp_table_param;
+
+ /* Sorting related info */
+ Filesort *filesort;
+
+ /**
+ List of topmost expressions in the select list. The *next* JOIN TAB
+ in the plan should use it to obtain correct values. Same applicable to
+ all_fields. These lists are needed because after tmp tables functions
+ will be turned to fields. These variables are pointing to
+ tmp_fields_list[123]. Valid only for tmp tables and the last non-tmp
+ table in the query plan.
+ @see JOIN::make_tmp_tables_info()
+ */
+ List<Item> *fields;
+ /** List of all expressions in the select list */
+ List<Item> *all_fields;
+ /*
+ Pointer to the ref array slice which to switch to before sending
+ records. Valid only for tmp tables.
+ */
+ Ref_ptr_array *ref_array;
+
+ /** Number of records saved in tmp table */
+ ha_rows send_records;
+
+ /** HAVING condition for checking prior saving a record into tmp table*/
+ Item *having;
+
+ /** TRUE <=> remove duplicates on this table. */
+ bool distinct;
+
/*
Semi-join strategy to be used for this join table. This is a copy of
POSITION::sj_strategy field. This field is set up by the
@@ -426,9 +465,9 @@ typedef struct st_join_table {
void cleanup();
inline bool is_using_loose_index_scan()
{
- return (select && select->quick &&
- (select->quick->get_type() ==
- QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX));
+ const SQL_SELECT *sel= filesort ? filesort->select : select;
+ return (sel && sel->quick &&
+ (sel->quick->get_type() == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX));
}
bool is_using_agg_loose_index_scan ()
{
@@ -563,16 +602,22 @@ typedef struct st_join_table {
void save_explain_data(Explain_table_access *eta, table_map prefix_tables,
bool distinct, struct st_join_table *first_top_tab);
- void update_explain_data(uint idx);
+ bool use_order() const; ///< Use ordering provided by chosen index?
+ bool sort_table();
+ bool remove_duplicates();
+
} JOIN_TAB;
#include "sql_join_cache.h"
-enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool
- end_of_records);
-enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool
- end_of_records);
+enum_nested_loop_state
+sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
+enum_nested_loop_state
+sub_select(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
+enum_nested_loop_state
+sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
+
enum_nested_loop_state
end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
bool end_of_records);
@@ -867,12 +912,14 @@ typedef struct st_position
Sj_materialization_picker sjmat_picker;
} POSITION;
+typedef Bounds_checked_array<Item_null_result*> Item_null_array;
+
typedef struct st_rollup
{
enum State { STATE_NONE, STATE_INITED, STATE_READY };
State state;
- Item_null_result **null_items;
- Item ***ref_pointer_arrays;
+ Item_null_array null_items;
+ Ref_ptr_array *ref_pointer_arrays;
List<Item> *fields;
} ROLLUP;
@@ -886,6 +933,56 @@ public:
class Pushdown_query;
+/**
+ @brief
+ Class to perform postjoin aggregation operations
+
+ @details
+ The result records are obtained on the put_record() call.
+ The aggrgation process is determined by the write_func, it could be:
+ end_write Simply store all records in tmp table.
+ end_write_group Perform grouping using join->group_fields,
+ records are expected to be sorted.
+ end_update Perform grouping using the key generated on tmp
+ table. Input records aren't expected to be sorted.
+ Tmp table uses the heap engine
+ end_update_unique Same as above, but the engine is myisam.
+
+ Lazy table initialization is used - the table will be instantiated and
+ rnd/index scan started on the first put_record() call.
+
+*/
+
+class AGGR_OP :public Sql_alloc
+{
+public:
+ JOIN_TAB *join_tab;
+
+ AGGR_OP(JOIN_TAB *tab) : join_tab(tab), write_func(NULL)
+ {};
+
+ enum_nested_loop_state put_record() { return put_record(false); };
+ /*
+ Send the result of operation further (to a next operation/client)
+ This function is called after all records were put into tmp table.
+
+ @return return one of enum_nested_loop_state values.
+ */
+ enum_nested_loop_state end_send();
+ /** write_func setter */
+ void set_write_func(Next_select_func new_write_func)
+ {
+ write_func= new_write_func;
+ }
+
+private:
+ /** Write function that would be used for saving records in tmp table. */
+ Next_select_func write_func;
+ enum_nested_loop_state put_record(bool end_of_records);
+ bool prepare_tmp_table();
+};
+
+
class JOIN :public Sql_alloc
{
private:
@@ -954,14 +1051,6 @@ protected:
public:
JOIN_TAB *join_tab, **best_ref;
-
- /*
- Saved join_tab for pre_sorting. create_sort_index() will save here..
- */
- JOIN_TAB *pre_sort_join_tab;
- uint pre_sort_index;
- Item *pre_sort_idx_pushed_cond;
- void clean_pre_sort_join_tab();
/* List of fields that aren't under an aggregate function */
List<Item_field> non_agg_fields;
@@ -979,8 +1068,6 @@ public:
uint top_table_access_tabs_count;
JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs
- JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution
-
List<JOIN_TAB_RANGE> join_tab_ranges;
/*
@@ -1011,6 +1098,7 @@ public:
We keep it here so that it is saved/restored with JOIN::restore_tmp.
*/
uint top_join_tab_count;
+ uint aggr_tables; ///< Number of post-join tmp tables
uint send_group_parts;
/*
This counts how many times do_select() was invoked for this JOIN.
@@ -1123,6 +1211,7 @@ public:
*/
table_map complex_firstmatch_tables;
+ Next_select_func first_select;
/*
The cost of best complete join plan found so far during optimization,
after optimization phase - cost of picked join order (not taking into
@@ -1138,9 +1227,6 @@ public:
double join_record_count;
List<Item> *fields;
List<Cached_item> group_fields, group_fields_cache;
- TABLE *tmp_table;
- /// used to store 2 possible tmp table of SELECT
- TABLE *exec_tmp_table1, *exec_tmp_table2;
THD *thd;
Item_sum **sum_funcs, ***sum_funcs_end;
/** second copy of sumfuncs (for queries with 2 temporary tables */
@@ -1149,6 +1235,8 @@ public:
Item *having;
Item *tmp_having; ///< To store having when processed temporary table
Item *having_history; ///< Store having for explain
+ ORDER *group_list_for_estimates;
+ bool having_is_correlated;
ulonglong select_options;
/*
Bitmap of allowed types of the join caches that
@@ -1187,26 +1275,6 @@ public:
*/
bool filesort_found_rows;
- /**
- Copy of this JOIN to be used with temporary tables.
-
- tmp_join is used when the JOIN needs to be "reusable" (e.g. in a
- subquery that gets re-executed several times) and we know will use
- temporary tables for materialization. The materialization to a
- temporary table overwrites the JOIN structure to point to the
- temporary table after the materialization is done. This is where
- tmp_join is used : it's a copy of the JOIN before the
- materialization and is used in restoring before re-execution by
- overwriting the current JOIN structure with the saved copy.
- Because of this we should pay extra care of not freeing up helper
- structures that are referenced by the original contents of the
- JOIN. We can check for this by making sure the "current" join is
- not the temporary copy, e.g. !tmp_join || tmp_join != join
-
- We should free these sub-structures at JOIN::destroy() if the
- "current" join has a copy is not that copy.
- */
- JOIN *tmp_join;
ROLLUP rollup; ///< Used with rollup
bool mixed_implicit_grouping;
@@ -1228,6 +1296,19 @@ public:
GROUP/ORDER BY.
*/
bool simple_order, simple_group;
+
+ /*
+ ordered_index_usage is set if an ordered index access
+ should be used instead of a filesort when computing
+ ORDER/GROUP BY.
+ */
+ enum
+ {
+ ordered_index_void, // No ordered index avail.
+ ordered_index_group_by, // Use index for GROUP BY
+ ordered_index_order_by // Use index for ORDER BY
+ } ordered_index_usage;
+
/**
Is set only in case if we have a GROUP BY clause
and no ORDER BY after constant elimination of 'order'.
@@ -1280,10 +1361,19 @@ public:
List<Item> exec_const_order_group_cond;
SQL_SELECT *select; ///<created in optimisation phase
JOIN_TAB *return_tab; ///<used only for outer joins
- Item **ref_pointer_array; ///<used pointer reference for this select
- // Copy of above to be used with different lists
- Item **items0, **items1, **items2, **items3, **current_ref_pointer_array;
- uint ref_pointer_array_size; ///< size of above in bytes
+
+ /*
+ Used pointer reference for this select.
+ select_lex->ref_pointer_array contains five "slices" of the same length:
+ |========|========|========|========|========|
+ ref_ptrs items0 items1 items2 items3
+ */
+ Ref_ptr_array ref_ptrs;
+ // Copy of the initial slice above, to be used with different lists
+ Ref_ptr_array items0, items1, items2, items3;
+ // Used by rollup, to restore ref_ptrs after overwriting it.
+ Ref_ptr_array current_ref_ptrs;
+
const char *zero_result_cause; ///< not 0 if exec must return zero result
bool union_part; ///< this subselect is part of union
@@ -1310,20 +1400,12 @@ public:
/* SJM nests that are executed with SJ-Materialization strategy */
List<SJ_MATERIALIZATION_INFO> sjm_info_list;
- /*
- storage for caching buffers allocated during query execution.
- These buffers allocations need to be cached as the thread memory pool is
- cleared only at the end of the execution of the whole query and not caching
- allocations that occur in repetition at execution time will result in
- excessive memory usage.
- Note: make_simple_join always creates an execution plan that accesses
- a single table, thus it is sufficient to have a one-element array for
- table_reexec.
- */
- SORT_FIELD *sortorder; // make_unireg_sortorder()
- TABLE *table_reexec[1]; // make_simple_join()
- JOIN_TAB *join_tab_reexec; // make_simple_join()
- /* end of allocation caching storage */
+ /** TRUE <=> ref_pointer_array is set to items3. */
+ bool set_group_rpa;
+ /** Exec time only: TRUE <=> current group has been sent */
+ bool group_sent;
+
+ JOIN_TAB *sort_and_group_aggr_tab;
JOIN(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg,
select_result *result_arg)
@@ -1335,12 +1417,13 @@ public:
void init(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg,
select_result *result_arg)
{
- join_tab= join_tab_save= 0;
+ join_tab= 0;
table= 0;
table_count= 0;
top_join_tab_count= 0;
const_tables= 0;
const_table_map= 0;
+ aggr_tables= 0;
eliminated_tables= 0;
join_list= 0;
implicit_grouping= FALSE;
@@ -1350,25 +1433,21 @@ public:
send_records= 0;
found_records= 0;
fetch_limit= HA_POS_ERROR;
- join_examined_rows= 0;
- exec_tmp_table1= 0;
- exec_tmp_table2= 0;
- sortorder= 0;
- table_reexec[0]= 0;
- join_tab_reexec= 0;
thd= thd_arg;
sum_funcs= sum_funcs2= 0;
procedure= 0;
having= tmp_having= having_history= 0;
+ having_is_correlated= false;
+ group_list_for_estimates= 0;
select_options= select_options_arg;
result= result_arg;
lock= thd_arg->lock;
select_lex= 0; //for safety
- tmp_join= 0;
select_distinct= MY_TEST(select_options & SELECT_DISTINCT);
no_order= 0;
simple_order= 0;
simple_group= 0;
+ ordered_index_usage= ordered_index_void;
need_distinct= 0;
skip_sort_order= 0;
need_tmp= 0;
@@ -1376,8 +1455,11 @@ public:
error= 0;
select= 0;
return_tab= 0;
- ref_pointer_array= items0= items1= items2= items3= 0;
- ref_pointer_array_size= 0;
+ ref_ptrs.reset();
+ items0.reset();
+ items1.reset();
+ items2.reset();
+ items3.reset();
zero_result_cause= 0;
optimized= 0;
have_query_plan= QEP_NOT_PRESENT_YET;
@@ -1405,10 +1487,13 @@ public:
rollup.state= ROLLUP::STATE_NONE;
no_const_tables= FALSE;
+ first_select= sub_select;
+ set_group_rpa= false;
+ group_sent= 0;
+
outer_ref_cond= pseudo_bits_cond= NULL;
in_to_exists_where= NULL;
in_to_exists_having= NULL;
- pre_sort_join_tab= NULL;
emb_sjm_nest= NULL;
sjm_lookup_tables= 0;
@@ -1420,7 +1505,10 @@ public:
table_access_tabs= NULL;
}
- int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num,
+ /* True if the plan guarantees that it will be returned zero or one row */
+ bool only_const_tables() { return const_tables == table_count; }
+
+ int prepare(TABLE_LIST *tables, uint wind_num,
COND *conds, uint og_num, ORDER *order, bool skip_order_by,
ORDER *group, Item *having, ORDER *proc_param, SELECT_LEX *select,
SELECT_LEX_UNIT *unit);
@@ -1431,6 +1519,7 @@ public:
int init_execution();
void exec();
void exec_inner();
+ bool prepare_result(List<Item> **columns_list);
int destroy();
void restore_tmp();
bool alloc_func_list();
@@ -1440,16 +1529,42 @@ public:
bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
bool before_group_by, bool recompute= FALSE);
- inline void set_items_ref_array(Item **ptr)
+ /// Initialzes a slice, see comments for ref_ptrs above.
+ Ref_ptr_array ref_ptr_array_slice(size_t slice_num)
+ {
+ size_t slice_sz= select_lex->ref_pointer_array.size() / 5U;
+ DBUG_ASSERT(select_lex->ref_pointer_array.size() % 5 == 0);
+ DBUG_ASSERT(slice_num < 5U);
+ return Ref_ptr_array(&select_lex->ref_pointer_array[slice_num * slice_sz],
+ slice_sz);
+ }
+
+ /**
+ Overwrites one slice with the contents of another slice.
+ In the normal case, dst and src have the same size().
+ However: the rollup slices may have smaller size than slice_sz.
+ */
+ void copy_ref_ptr_array(Ref_ptr_array dst_arr, Ref_ptr_array src_arr)
+ {
+ DBUG_ASSERT(dst_arr.size() >= src_arr.size());
+ void *dest= dst_arr.array();
+ const void *src= src_arr.array();
+ memcpy(dest, src, src_arr.size() * src_arr.element_size());
+ }
+
+ /// Overwrites 'ref_ptrs' and remembers the the source as 'current'.
+ void set_items_ref_array(Ref_ptr_array src_arr)
{
- memcpy((char*) ref_pointer_array, (char*) ptr, ref_pointer_array_size);
- current_ref_pointer_array= ptr;
+ copy_ref_ptr_array(ref_ptrs, src_arr);
+ current_ref_ptrs= src_arr;
}
- inline void init_items_ref_array()
+
+ /// Initializes 'items0' and remembers that it is 'current'.
+ void init_items_ref_array()
{
- items0= ref_pointer_array + all_fields.elements;
- memcpy(items0, ref_pointer_array, ref_pointer_array_size);
- current_ref_pointer_array= items0;
+ items0= ref_ptr_array_slice(1);
+ copy_ref_ptr_array(items0, ref_ptrs);
+ current_ref_ptrs= items0;
}
bool rollup_init();
@@ -1458,18 +1573,10 @@ public:
Item_sum ***func);
int rollup_send_data(uint idx);
int rollup_write_data(uint idx, TABLE *table);
- /**
- Release memory and, if possible, the open tables held by this execution
- plan (and nested plans). It's used to release some tables before
- the end of execution in order to increase concurrency and reduce
- memory consumption.
- */
void join_free();
/** Cleanup this JOIN, possibly for reuse */
void cleanup(bool full);
void clear();
- bool save_join_tab();
- bool init_save_join_tab();
bool send_row_on_empty_set()
{
return (do_send_rows && implicit_grouping && !group_optimized_away &&
@@ -1488,6 +1595,8 @@ public:
return (table_map(1) << table_count) - 1;
}
void drop_unused_derived_keys();
+ bool get_best_combination();
+ bool add_sorting_to_table(JOIN_TAB *tab, ORDER *order);
inline void eval_select_list_used_tables();
/*
Return the table for which an index scan can be used to satisfy
@@ -1553,12 +1662,41 @@ public:
JOIN_TAB *first_breadth_first_execution_tab() { return join_tab; }
private:
/**
+ Create a temporary table to be used for processing DISTINCT/ORDER
+ BY/GROUP BY.
+
+ @note Will modify JOIN object wrt sort/group attributes
+
+ @param tab the JOIN_TAB object to attach created table to
+ @param tmp_table_fields List of items that will be used to define
+ column types of the table.
+ @param tmp_table_group Group key to use for temporary table, NULL if none.
+ @param save_sum_fields If true, do not replace Item_sum items in
+ @c tmp_fields list with Item_field items referring
+ to fields in temporary table.
+
+ @returns false on success, true on failure
+ */
+ bool create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *tmp_table_fields,
+ ORDER *tmp_table_group,
+ bool save_sum_fields,
+ bool distinct,
+ bool keep_row_ordermake);
+ /**
+ Optimize distinct when used on a subset of the tables.
+
+ E.g.,: SELECT DISTINCT t1.a FROM t1,t2 WHERE t1.b=t2.b
+ In this case we can stop scanning t2 when we have found one t1.a
+ */
+ void optimize_distinct();
+
+ /**
TRUE if the query contains an aggregate function but has no GROUP
BY clause.
*/
bool implicit_grouping;
- bool make_simple_join(JOIN *join, TABLE *tmp_table);
void cleanup_item_list(List<Item> &items) const;
+ bool make_aggr_tables_info();
};
enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS};
@@ -1583,7 +1721,7 @@ extern const char *join_type_str[];
void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param,
List<Item> &fields, bool reset_with_sum_func);
bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
- Item **ref_pointer_array,
+ Ref_ptr_array ref_pointer_array,
List<Item> &new_list1, List<Item> &new_list2,
uint elements, List<Item> &fields);
void copy_fields(TMP_TABLE_PARAM *param);
@@ -1824,19 +1962,19 @@ int safe_index_read(JOIN_TAB *tab);
int get_quick_record(SQL_SELECT *select);
SORT_FIELD * make_unireg_sortorder(THD *thd, ORDER *order, uint *length,
SORT_FIELD *sortorder);
-int setup_order(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
+int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
List<Item> &fields, List <Item> &all_fields, ORDER *order);
-int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
+int setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
List<Item> &fields, List<Item> &all_fields, ORDER *order,
bool *hidden_group_fields);
bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
- Item **ref_pointer_array);
+ Ref_ptr_array ref_pointer_array);
int join_read_key2(THD *thd, struct st_join_table *tab, TABLE *table,
struct st_table_ref *table_ref);
bool handle_select(THD *thd, LEX *lex, select_result *result,
ulong setup_tables_done_option);
-bool mysql_select(THD *thd, Item ***rref_pointer_array,
+bool mysql_select(THD *thd,
TABLE_LIST *tables, uint wild_num, List<Item> &list,
COND *conds, uint og_num, ORDER *order, ORDER *group,
Item *having, ORDER *proc_param, ulonglong select_type,