diff options
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item_cmpfunc.cc | 53 | ||||
-rw-r--r-- | sql/item_subselect.cc | 61 | ||||
-rw-r--r-- | sql/item_subselect.h | 30 | ||||
-rw-r--r-- | sql/opt_range.cc | 3 | ||||
-rw-r--r-- | sql/opt_subselect.cc | 611 | ||||
-rw-r--r-- | sql/opt_subselect.h | 10 | ||||
-rw-r--r-- | sql/sql_base.cc | 11 | ||||
-rw-r--r-- | sql/sql_class.cc | 22 | ||||
-rw-r--r-- | sql/sql_class.h | 12 | ||||
-rw-r--r-- | sql/sql_join_cache.cc | 61 | ||||
-rw-r--r-- | sql/sql_select.cc | 1118 | ||||
-rw-r--r-- | sql/sql_select.h | 87 | ||||
-rw-r--r-- | sql/sql_test.cc | 90 | ||||
-rw-r--r-- | sql/sql_union.cc | 17 | ||||
-rw-r--r-- | sql/table.cc | 6 | ||||
-rw-r--r-- | sql/table.h | 20 |
16 files changed, 1556 insertions, 656 deletions
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index f4a814b7aee..071c4bdeebb 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -5740,25 +5740,11 @@ Item_field* Item_equal::get_first(Item_field *field) { /* It's a field from an materialized semi-join. We can substitute it only - for a field from the same semi-join. + for a field from the same semi-join. Find the first of such items. */ - JOIN_TAB *first; - JOIN *join= field_tab->join; - int tab_idx= field_tab - field_tab->join->join_tab; - - /* Find the first table of this semi-join nest */ - for (int i= tab_idx; i >= (int)join->const_tables; i--) - { - if (join->join_tab[i].table->map & emb_nest->sj_inner_tables) - first= join->join_tab + i; - else - // Found first tab that doesn't belong to current SJ. - break; - } - /* Find an item to substitute for. */ while ((item= it++)) { - if (item->field->table->reginfo.join_tab >= first) + if (item->field->table->pos_in_table_list->embedding == emb_nest) { /* If we found given field then return NULL to avoid unnecessary @@ -5770,32 +5756,27 @@ Item_field* Item_equal::get_first(Item_field *field) } else { -#if 0 /* The field is not in SJ-Materialization nest. We must return the first - field that's not embedded in a SJ-Materialization nest. - Example: suppose we have a join order: + field in the join order. The field may be inside a semi-join nest, i.e + a join order may look like this: SJ-Mat(it1 it2) ot1 ot2 - and equality ot2.col = ot1.col = it2.col - If we're looking for best substitute for 'ot2.col', we should pick ot1.col - and not it2.col, because when we run a join between ot1 and ot2 - execution of SJ-Mat(...) has already finished and we can't rely on the - value of it*.*. - psergey-fix-fix: ^^ THAT IS INCORRECT ^^. Pick the first, whatever that - is. + where we're looking what to substitute ot2.col for. In this case we must + still return it1.col, here's a proof why: + + First let's note that either it1.col or it2.col participates in + subquery's IN-equality. It can't be otherwise, because materialization is + only applicable to uncorrelated subqueries, so the only way we could + infer "it1.col=ot1.col" is from the IN-equality. Ok, so IN-eqality has + it1.col or it2.col on its inner side. it1.col is first such item in the + join order, so it's not possible for SJ-Mat to be + SJ-Materialization-lookup, it is SJ-Materialization-Scan. The scan part + of this strategy will unpack value of it1.col=it2.col into it1.col + (that's the first equal item inside the subquery), and we'll be able to + get it from there. qed. */ - while ((item= it++)) - { - TABLE_LIST *emb_nest= item->field->table->pos_in_table_list->embedding; - if (!emb_nest || !emb_nest->sj_mat_info || - !emb_nest->sj_mat_info->is_used) - { - return item; - } - } -#endif return fields.head(); } // Shouldn't get here. diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 0d0acdb29e4..aae1af8bd73 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -203,11 +203,7 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) { // all transformation is done (used by prepared statements) changed= 1; - inside_first_fix_fields= FALSE; - - - // all transformation is done (used by prepared statements) - changed= 1; + inside_first_fix_fields= FALSE; /* Substitute the current item with an Item_in_optimizer that was @@ -232,13 +228,13 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) if (!(*ref)->fixed) res= (*ref)->fix_fields(thd, ref); goto end; -//psergey-merge: done_first_fix_fields= FALSE; + } // Is it one field subselect? if (engine->cols() > max_columns) { my_error(ER_OPERAND_COLUMNS, MYF(0), 1); -//psergey-merge: done_first_fix_fields= FALSE; + goto end; } fix_length_and_dec(); @@ -256,6 +252,7 @@ bool Item_subselect::fix_fields(THD *thd_param, Item **ref) end: done_first_fix_fields= FALSE; + inside_first_fix_fields= FALSE; thd->where= save_where; return res; } @@ -485,6 +482,12 @@ bool Item_subselect::exec() return (res); } +int Item_subselect::optimize() +{ + int res; + res= engine->optimize(); + return res; +} /* Compute the IN predicate if the left operand's cache changed. @@ -741,9 +744,6 @@ Item_singlerow_subselect::select_transformer(JOIN *join) void Item_singlerow_subselect::store(uint i, Item *item) { row[i]->store(item); - //psergey-merge: can do without that: row[i]->cache_value(); - //psergey-backport-timours: ^ really, without that ^ - //psergey-try-merge-again: row[i]->cache_value(); } @@ -2105,7 +2105,7 @@ void Item_in_subselect::update_used_tables() @retval FALSE an execution method was chosen successfully */ -bool Item_in_subselect::setup_engine() +bool Item_in_subselect::setup_engine(bool dont_switch_arena) { subselect_hash_sj_engine *new_engine= NULL; bool res= FALSE; @@ -2120,14 +2120,15 @@ bool Item_in_subselect::setup_engine() old_engine= (subselect_single_select_engine*) engine; - if (arena->is_conventional()) + if (arena->is_conventional() || dont_switch_arena) arena= 0; else thd->set_n_backup_active_arena(arena, &backup); if (!(new_engine= new subselect_hash_sj_engine(thd, this, old_engine)) || - new_engine->init_permanent(unit->get_unit_column_types())) + new_engine->init_permanent(unit->get_unit_column_types(), + old_engine->get_identifier())) { Item_subselect::trans_res trans_res; /* @@ -3460,7 +3461,7 @@ subselect_hash_sj_engine::get_strategy_using_schema() bitmap_set_bit(&partial_match_key_parts, i); ++count_partial_match_columns; } - } + }; } /* If no column contains NULLs use regular hash index lookups. */ @@ -3659,6 +3660,7 @@ bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root) reexecution. @param tmp_columns the items that produce the data for the temp table + @param subquery_id subquery's identifier (for temptable name) @details - Create a temporary table to store the result of the IN subquery. The @@ -3674,7 +3676,8 @@ bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root) @retval FALSE otherwise */ -bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) +bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns, + uint subquery_id) { /* Options to create_tmp_table. */ ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS; @@ -3709,12 +3712,19 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) DBUG_RETURN(TRUE); } */ + char buf[32]; + uint len= my_snprintf(buf, sizeof(buf), "<subquery%d>", subquery_id); + char *name; + if (!(name= (char*)thd->alloc(len + 1))) + DBUG_RETURN(TRUE); + memcpy(name, buf, len+1); + if (!(result= new select_materialize_with_stats)) DBUG_RETURN(TRUE); if (((select_union*) result)->create_result_table( thd, tmp_columns, TRUE, tmp_create_options, - "materialized subselect", TRUE)) + name, TRUE)) DBUG_RETURN(TRUE); tmp_table= ((select_union*) result)->table; @@ -3795,7 +3805,7 @@ bool subselect_hash_sj_engine::make_semi_join_conds() if (!(tmp_table_ref= (TABLE_LIST*) thd->alloc(sizeof(TABLE_LIST)))) DBUG_RETURN(TRUE); - tmp_table_ref->init_one_table("", "materialized subselect", TL_READ); + tmp_table_ref->init_one_table("", tmp_table->alias, TL_READ); tmp_table_ref->table= tmp_table; context= new Name_resolution_context; @@ -3885,8 +3895,9 @@ subselect_hash_sj_engine::make_unique_engine() cur_ref_buff + test(maybe_null), we could use that information instead. */ + cur_ref_buff + null_count, - null_count ? tab->ref.key_buff : 0, + null_count ? cur_ref_buff : 0, cur_key_part->length, tab->ref.items[i]); cur_ref_buff+= cur_key_part->store_length; } @@ -3973,6 +3984,17 @@ void subselect_hash_sj_engine::cleanup() } +int subselect_hash_sj_engine::optimize() +{ + int res= 0; + SELECT_LEX *save_select= thd->lex->current_select; + thd->lex->current_select= materialize_join->select_lex; + res= materialize_join->optimize(); + thd->lex->current_select= save_select; + + return res; +} + /** Execute a subquery IN predicate via materialization. @@ -4911,6 +4933,8 @@ bool subselect_rowid_merge_engine::partial_match() /* If there is a non-NULL key, it must be the first key in the keys array. */ DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key)); + /* The prioryty queue for keys must be empty. */ + DBUG_ASSERT(!pq.elements); /* All data accesses during execution are via handler::ha_rnd_pos() */ tmp_table->file->ha_rnd_init(0); @@ -5034,6 +5058,7 @@ bool subselect_rowid_merge_engine::partial_match() DBUG_ASSERT(FALSE); end: + queue_remove_all(&pq); tmp_table->file->ha_rnd_end(); return res; } diff --git a/sql/item_subselect.h b/sql/item_subselect.h index 2e7e117096e..dea8339dc59 100644 --- a/sql/item_subselect.h +++ b/sql/item_subselect.h @@ -46,15 +46,16 @@ public: < child_join->prepare < engine->prepare *ref= substitution; + substitution= NULL; < Item_subselect::fix_fields */ Item *substitution; public: /* unit of subquery */ st_select_lex_unit *unit; -protected: /* engine that perform execution of subselect (single select or union) */ subselect_engine *engine; +protected: /* old engine if engine was changed */ subselect_engine *old_engine; /* cache of used external tables */ @@ -139,6 +140,7 @@ public: bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item); void fix_after_pullout(st_select_lex *new_parent, Item **ref); void recalc_used_tables(st_select_lex *new_parent, bool after_pullout); + virtual int optimize(); virtual bool exec(); virtual void fix_length_and_dec(); table_map used_tables() const; @@ -334,7 +336,9 @@ protected: all JOIN in UNION */ Item *expr; +public: Item_in_optimizer *optimizer; +protected: bool was_null; bool abort_on_null; public: @@ -380,6 +384,16 @@ public: }; enum_exec_method exec_method; + /* + TRUE<=>this is a flattenable semi-join, false overwise. + */ + bool is_flattenable_semijoin; + + /* + Cost to populate the temporary table (set on if-needed basis). + */ + //double startup_cost; + bool *get_cond_guard(int i) { return pushed_cond_guards ? pushed_cond_guards + i : NULL; @@ -429,7 +443,7 @@ public: bool fix_fields(THD *thd, Item **ref); void fix_after_pullout(st_select_lex *new_parent, Item **ref); void update_used_tables(); - bool setup_engine(); + bool setup_engine(bool dont_switch_arena); bool init_left_expr_cache(); /* Inform 'this' that it was computed, and contains a valid result. */ void set_first_execution() { if (first_execution) first_execution= FALSE; } @@ -503,6 +517,7 @@ public: THD * get_thd() { return thd; } virtual int prepare()= 0; virtual void fix_length_and_dec(Item_cache** row)= 0; + virtual int optimize() { DBUG_ASSERT(0); return 0; } /* Execute the engine @@ -735,7 +750,7 @@ inline bool Item_subselect::is_uncacheable() const class subselect_hash_sj_engine : public subselect_engine { -protected: +public: /* The table into which the subquery is materialized. */ TABLE *tmp_table; /* TRUE if the subquery was materialized into a temp table. */ @@ -747,14 +762,16 @@ protected: of subselect_single_select_engine::[prepare | cols]. */ subselect_single_select_engine *materialize_engine; +protected: /* The engine used to compute the IN predicate. */ subselect_engine *lookup_engine; /* QEP to execute the subquery and materialize its result into a temporary table. Created during the first call to exec(). */ +public: JOIN *materialize_join; - +protected: /* Keyparts of the only non-NULL composite index in a rowid merge. */ MY_BITMAP non_null_key_parts; /* Keyparts of the single column indexes with NULL, one keypart per index. */ @@ -767,7 +784,9 @@ protected: IN results because index lookups sometimes match values that are actually not equal to the search key in SQL terms. */ +public: Item_cond_and *semi_join_conds; +protected: /* Possible execution strategies that can be used to compute hash semi-join.*/ enum exec_strategy { UNDEFINED, @@ -803,10 +822,11 @@ public: } ~subselect_hash_sj_engine(); - bool init_permanent(List<Item> *tmp_columns); + bool init_permanent(List<Item> *tmp_columns, uint subquery_id); bool init_runtime(); void cleanup(); int prepare() { return 0; } /* Override virtual function in base class. */ + int optimize(); int exec(); virtual void print(String *str, enum_query_type query_type); uint cols() diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 0d8a0a6d73e..fe7bf3134ef 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -2287,7 +2287,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, quick=0; needed_reg.clear_all(); quick_keys.clear_all(); - if (keys_to_use.is_clear_all()) + DBUG_ASSERT(!head->is_filled_at_execution()); + if (keys_to_use.is_clear_all() || head->is_filled_at_execution()) DBUG_RETURN(0); records= head->file->stats.records; if (!records) diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 1855224440c..0add6db447a 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -2,7 +2,7 @@ @file @brief - Subquery optimization code here. + Semi-join subquery optimizations code */ @@ -16,7 +16,162 @@ #include <my_bit.h> -// Our own: +/* + This file contains optimizations for semi-join subqueries. + + Contents + -------- + 1. What is a semi-join subquery + 2. General idea about semi-join execution + 2.1 Correlated vs uncorrelated semi-joins + 2.2 Mergeable vs non-mergeable semi-joins + 3. Code-level view of semi-join processing + 3.1 Conversion + 3.1.1 Merged semi-join TABLE_LIST object + 3.1.2 Non-merged semi-join data structure + 3.2 Semi-joins and query optimization + 3.2.1 Non-merged semi-joins and join optimization + 3.2.2 Merged semi-joins and join optimization + 3.3 Semi-joins and query execution + + 1. What is a semi-join subquery + ------------------------------- + We use this definition of semi-join: + + outer_tbl SEMI JOIN inner_tbl ON cond = {set of outer_tbl.row such that + exist inner_tbl.row, for which + cond(outer_tbl.row,inner_tbl.row) + is satisfied} + + That is, semi-join operation is similar to inner join operation, with + exception that we don't care how many matches a row from outer_tbl has in + inner_tbl. + + In SQL terms: a semi-join subquery is an IN subquery that is an AND-part of + the WHERE/ON clause. + + 2. General idea about semi-join execution + ----------------------------------------- + We can execute semi-join in a way similar to inner join, with exception that + we need to somehow ensure that we do not generate record combinations that + differ only in rows of inner tables. + There is a number of different ways to achieve this property, implemented by + a number of semi-join execution strategies. + Some strategies can handle any semi-joins, other can be applied only to + semi-joins that have certain properties that are described below: + + 2.1 Correlated vs uncorrelated semi-joins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Uncorrelated semi-joins are special in the respect that they allow to + - execute the subquery (possible as it's uncorrelated) + - somehow make sure that generated set does not have duplicates + - perform an inner join with outer tables. + + or, rephrasing in SQL form: + + SELECT ... FROM ot WHERE ot.col IN (SELECT it.col FROM it WHERE uncorr_cond) + -> + SELECT ... FROM ot JOIN (SELECT DISTINCT it.col FROM it WHERE uncorr_cond) + + 2.2 Mergeable vs non-mergeable semi-joins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Semi-join operation has some degree of commutability with inner join + operation: we can join subquery's tables with ouside table(s) and eliminate + duplicate record combination after that: + + ot1 JOIN ot2 SEMI_JOIN{it1,it2} (it1 JOIN it2) ON sjcond(ot2,it*) -> + | + +-------------------------------+ + v + ot1 SEMI_JOIN{it1,it2} (it1 JOIN it2 JOIN ot2) ON sjcond(ot2,it*) + + In order for this to work, subquery's top-level operation must be join, and + grouping or ordering with limit (grouping or ordering with limit are not + commutative with duplicate removal). In other words, the conversion is + possible when the subquery doesn't have GROUP BY clause, any aggregate + functions*, or ORDER BY ... LIMIT clause. + + Definitions: + - Subquery whose top-level operation is a join is called *mergeable semi-join* + - All other kinds of semi-join subqueries are considered non-mergeable. + + *- this requirement is actually too strong, but its exceptions are too + complicated to be considered here. + + 3. Code-level view of semi-join processing + ------------------------------------------ + + 3.1 Conversion and pre-optimization data structures + --------------------------------------------------- + * When doing JOIN::prepare for the subquery, we detect that it can be + converted into a semi-join and register it in parent_join->sj_subselects + + * At the start of parent_join->optimize(), the predicate is converted into + a semi-join node. A semi-join node is a TABLE_LIST object that is linked + somewhere in parent_join->join_list (either it is just present there, or + it is a descendant of some of its members). + + There are two kinds of semi-joins: + - Merged semi-joins + - Non-merged semi-joins + + 3.1.1 Merged semi-join TABLE_LIST object + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Merged semi-join object is a TABLE_LIST that contains a sub-join of + subquery tables and the semi-join ON expression (in this respect it is + very similar to nested outer join representation) + Merged semi-join represents this SQL: + + ... SEMI JOIN (inner_tbl1 JOIN ... JOIN inner_tbl_n) ON sj_on_expr + + Semi-join objects of this kind have TABLE_LIST::sj_subq_pred set. + + 3.1.2 Non-merged semi-join data structure + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Non-merged semi-join object is a leaf TABLE_LIST object that has a subquery + that produces rows. It is similar to a base table and represents this SQL: + + ... SEMI_JOIN (SELECT non_mergeable_select) ON sj_on_expr + + Subquery items that were converted into semi-joins are removed from the WHERE + clause. (They do remain in PS-saved WHERE clause, and they replace themselves + with Item_int(1) on subsequent re-executions). + + 3.2 Semi-joins and join optimization + ------------------------------------ + + 3.2.1 Non-merged semi-joins and join optimization + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + For join optimization purposes, non-merged semi-join nests are similar to + base tables - they've got one JOIN_TAB, which can be accessed with one of + two methods: + - full table scan (representing SJ-Materialization-Scan strategy) + - eq_ref-like table lookup (representing SJ-Materialization-Lookup) + + Unlike regular base tables, non-merged semi-joins have: + - non-zero JOIN_TAB::startup_cost, and + - join_tab->table->is_filled_at_execution()==TRUE, which means one + cannot do const table detection or range analysis or other table data- + dependent inferences + // instead, get_delayed_table_estimates() runs optimization on the nest so that + // we get an idea about temptable size + + 3.2.2 Merged semi-joins and join optimization + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + - optimize_semijoin_nests() does pre-optimization + - during join optimization, the join has one JOIN_TAB (or is it POSITION?) + array, and suffix-based detection is used, see advance_sj_state() + - after join optimization is done, get_best_combination() switches + the data-structure to prefix-based, multiple JOIN_TAB ranges format. + + 3.3 Semi-joins and query execution + ---------------------------------- + * Join executor has hooks for all semi-join strategies. + TODO elaborate. + +*/ + + static bool subquery_types_allow_materialization(Item_in_subselect *in_subs); static bool replace_where_subcondition(JOIN *join, Item **expr, @@ -25,6 +180,8 @@ static bool replace_where_subcondition(JOIN *join, Item **expr, static int subq_sj_candidate_cmp(Item_in_subselect* const *el1, Item_in_subselect* const *el2); static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred); +static bool convert_subq_to_jtbm(JOIN *parent_join, + Item_in_subselect *subq_pred, bool *remove); static TABLE_LIST *alloc_join_nest(THD *thd); static void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist); @@ -50,17 +207,22 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where); /* Check if we need JOIN::prepare()-phase subquery rewrites and if yes, do them + SYNOPSIS + check_and_do_in_subquery_rewrites() + join Subquery's join + DESCRIPTION Check if we need to do - - subquery->semi-join rewrite + - subquery -> mergeable semi-join rewrite - if the subquery can be handled with materialization - 'substitution' rewrite for table-less subqueries like "(select 1)" - - and mark appropriately + - IN->EXISTS rewrite + and, depending on the rewrite, either do it, or record it to be done at a + later phase. RETURN - 0 - OK - -1 - Some sort of query error + 0 - OK + Other - Some sort of query error */ int check_and_do_in_subquery_rewrites(JOIN *join) @@ -166,6 +328,7 @@ int check_and_do_in_subquery_rewrites(JOIN *join) (void)subquery_types_allow_materialization(in_subs); in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest; + in_subs->is_flattenable_semijoin= TRUE; /* Register the subquery for further processing in flatten_subqueries() */ select_lex-> @@ -220,10 +383,24 @@ int check_and_do_in_subquery_rewrites(JOIN *join) (in_subs->is_top_level_item() || optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4 - !in_subs->is_correlated && // 5 - in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6 + !in_subs->is_correlated) // 5 { + if (in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) in_subs->exec_method= Item_in_subselect::MATERIALIZATION; + + /* + If the subquery is an AND-part of WHERE register for being processed + with jtbm strategy + */ + if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && + thd->thd_marker.emb_on_expr_nest == (TABLE_LIST*)0x1 && + optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN)) + { + in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest; + in_subs->is_flattenable_semijoin= FALSE; + select_lex->outer_select()-> + join->sj_subselects.append(thd->mem_root, in_subs); + } } Item_subselect::trans_res trans_res; @@ -339,6 +516,69 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs) /* + Finalize IN->EXISTS conversion in case we couldn't use materialization. + + DESCRIPTION Invoke the IN->EXISTS converter + Replace the Item_in_subselect with its wrapper Item_in_optimizer in WHERE. + + RETURN + FALSE - Ok + TRUE - Fatal error +*/ + +static +bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *item) +{ + DBUG_ENTER("make_in_exists_conversion"); + JOIN *child_join= item->unit->first_select()->join; + Item_subselect::trans_res res; + item->changed= 0; + item->fixed= 0; + + SELECT_LEX *save_select_lex= thd->lex->current_select; + thd->lex->current_select= item->unit->first_select(); + + res= item->select_transformer(child_join); + + thd->lex->current_select= save_select_lex; + + if (res == Item_subselect::RES_ERROR) + DBUG_RETURN(TRUE); + + item->changed= 1; + item->fixed= 1; + + Item *substitute= item->substitution; + bool do_fix_fields= !item->substitution->fixed; + /* + The Item_subselect has already been wrapped with Item_in_optimizer, so we + should search for item->optimizer, not 'item'. + */ + Item *replace_me= item->optimizer; + DBUG_ASSERT(replace_me==substitute); + + Item **tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->conds : &(item->emb_on_expr_nest->on_expr); + if (replace_where_subcondition(join, tree, replace_me, substitute, + do_fix_fields)) + DBUG_RETURN(TRUE); + item->substitution= NULL; + + if (!thd->stmt_arena->is_conventional()) + { + tree= (item->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->select_lex->prep_where : + &(item->emb_on_expr_nest->prep_on_expr); + + if (replace_where_subcondition(join, tree, replace_me, substitute, + FALSE)) + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/* Convert semi-join subquery predicates into semi-join join nests SYNOPSIS @@ -445,25 +685,41 @@ bool convert_join_subqueries_to_semijoins(JOIN *join) // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES /* Replace all subqueries to be flattened with Item_int(1) */ arena= thd->activate_stmt_arena_if_needed(&backup); - for (in_subq= join->sj_subselects.front(); - in_subq != in_subq_end && - join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; - in_subq++) - { - Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? - &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); - if (replace_where_subcondition(join, tree, *in_subq, new Item_int(1), - FALSE)) - DBUG_RETURN(TRUE); /* purecov: inspected */ - } for (in_subq= join->sj_subselects.front(); in_subq != in_subq_end && join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; in_subq++) { - if (convert_subq_to_sj(join, *in_subq)) - DBUG_RETURN(TRUE); + bool remove_item= TRUE; + if ((*in_subq)->is_flattenable_semijoin) + { + if (convert_subq_to_sj(join, *in_subq)) + DBUG_RETURN(TRUE); + } + else + { + if (convert_subq_to_jtbm(join, *in_subq, &remove_item)) + DBUG_RETURN(TRUE); + } + if (remove_item) + { + Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); + Item *replace_me= *in_subq; + /* + JTBM: the subquery was already mapped with Item_in_optimizer, so we + should search for that, not for original Item_in_subselect. + TODO: what about delaying that rewrite until here? + */ + if (!(*in_subq)->is_flattenable_semijoin) + { + replace_me= (*in_subq)->optimizer; + } + if (replace_where_subcondition(join, tree, replace_me, new Item_int(1), + FALSE)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + } } skip_conversion: /* @@ -494,7 +750,19 @@ skip_conversion: bool do_fix_fields= !(*in_subq)->substitution->fixed; Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); - if (replace_where_subcondition(join, tree, *in_subq, substitute, + + Item *replace_me= *in_subq; + /* + JTBM: the subquery was already mapped with Item_in_optimizer, so we + should search for that, not for original Item_in_subselect. + TODO: what about delaying that rewrite until here? + */ + if (!(*in_subq)->is_flattenable_semijoin) + { + replace_me= (*in_subq)->optimizer; + } + + if (replace_where_subcondition(join, tree, replace_me, substitute, do_fix_fields)) DBUG_RETURN(TRUE); (*in_subq)->substitution= NULL; @@ -505,7 +773,7 @@ skip_conversion: &join->select_lex->prep_where : &((*in_subq)->emb_on_expr_nest->prep_on_expr); - if (replace_where_subcondition(join, tree, *in_subq, substitute, + if (replace_where_subcondition(join, tree, replace_me, substitute, FALSE)) DBUG_RETURN(TRUE); } @@ -517,6 +785,61 @@ skip_conversion: DBUG_RETURN(FALSE); } + +/* + Get #output_rows and scan_time estimates for a "delayed" table. + + SYNOPSIS + get_delayed_table_estimates() + table IN Table to get estimates for + out_rows OUT E(#rows in the table) + scan_time OUT E(scan_time). + startup_cost OUT cost to populate the table. + + DESCRIPTION + Get #output_rows and scan_time estimates for a "delayed" table. By + "delayed" here we mean that the table is filled at the start of query + execution. This means that the optimizer can't use table statistics to + get #rows estimate for it, it has to call this function instead. + + This function is expected to make different actions depending on the nature + of the table. At the moment there is only one kind of delayed tables, + non-flattenable semi-joins. +*/ + +void get_delayed_table_estimates(TABLE *table, + ha_rows *out_rows, + double *scan_time, + double *startup_cost) +{ + Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect; + item->optimize(); + + DBUG_ASSERT(item->engine->engine_type() == + subselect_engine::HASH_SJ_ENGINE); + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)item->engine); + JOIN *join= hash_sj_engine->materialize_join; + + double rows= 1; + double read_time= 0.0; + + /* Calculate #rows and cost of join execution */ + for (uint i= join->const_tables; i < join->tables; i++) + { + rows *= join->best_positions[i].records_read; + read_time += join->best_positions[i].read_time; + } + *out_rows= rows; + *startup_cost= read_time; + /* Calculate cost of scanning the temptable */ + double data_size= rows * hash_sj_engine->tmp_table->s->reclength; + /* Do like in handler::read_time */ + *scan_time= data_size/IO_SIZE + 2; +} + + /** @brief Replaces an expression destructively inside the expression tree of the WHERE clase. @@ -534,6 +857,7 @@ skip_conversion: @return <code>true</code> if there was an error, <code>false</code> if successful. */ + static bool replace_where_subcondition(JOIN *join, Item **expr, Item *old_cond, Item *new_cond, bool do_fix_fields) @@ -769,8 +1093,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) /* 3. Remove the original subquery predicate from the WHERE/ON */ // The subqueries were replaced for Item_int(1) earlier - subq_pred->exec_method= - Item_in_subselect::SEMI_JOIN; // for subsequent executions + subq_pred->exec_method= Item_in_subselect::SEMI_JOIN; // for subsequent executions /*TODO: also reset the 'with_subselect' there. */ /* n. Adjust the parent_join->tables counter */ @@ -887,6 +1210,118 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) DBUG_RETURN(FALSE); } + +/* + Convert subquery predicate into non-mergeable semi-join nest. + + TODO: + why does this do IN-EXISTS conversion? Can't we unify it with mergeable + semi-joins? currently, convert_subq_to_sj() cannot fail to convert (unless + fatal errors) + + + RETURN + FALSE - Ok + TRUE - Fatal error +*/ + +static bool convert_subq_to_jtbm(JOIN *parent_join, + Item_in_subselect *subq_pred, + bool *remove_item) +{ + SELECT_LEX *parent_lex= parent_join->select_lex; + List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list; + TABLE_LIST *emb_tbl_nest= NULL; // will change when we learn to handle outer joins + TABLE_LIST *tl; + DBUG_ENTER("convert_subq_to_jtbm"); + + if (subq_pred->setup_engine(TRUE)) + DBUG_RETURN(TRUE); + + if (subq_pred->engine->engine_type() != subselect_engine::HASH_SJ_ENGINE) + { + *remove_item= FALSE; + make_in_exists_conversion(parent_join->thd, parent_join, subq_pred); + DBUG_RETURN(FALSE); + } + *remove_item= TRUE; + + TABLE_LIST *jtbm; + char *tbl_alias; + const char alias_mask[]="<subquery%d>"; + if (!(tbl_alias= (char*)parent_join->thd->calloc(sizeof(alias_mask)+5)) || + !(jtbm= alloc_join_nest(parent_join->thd))) //todo: this is not a join nest! + { + DBUG_RETURN(TRUE); + } + + jtbm->join_list= emb_join_list; + jtbm->embedding= emb_tbl_nest; + jtbm->jtbm_subselect= subq_pred; + jtbm->nested_join= NULL; + + /* Nests do not participate in those 'chains', so: */ + /* jtbm->next_leaf= jtbm->next_local= jtbm->next_global == NULL*/ + emb_join_list->push_back(jtbm); + + /* + Inject the jtbm table into TABLE_LIST::next_leaf list, so that + make_join_statistics() and co. can find it. + */ + for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf) ; + tl->next_leaf= jtbm; + + /* + Same as above for TABLE_LIST::next_local chain + (a theory: a next_local chain always starts with ::leaf_tables + because view's tables are inserted after the view) + */ + for (tl= parent_lex->leaf_tables; tl->next_local; tl= tl->next_local) ; + tl->next_local= jtbm; + + /* A theory: no need to re-connect the next_global chain */ + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)subq_pred->engine); + jtbm->table= hash_sj_engine->tmp_table; + + jtbm->table->tablenr= parent_join->tables; + jtbm->table->map= table_map(1) << (parent_join->tables); + + parent_join->tables++; + + Item *conds= hash_sj_engine->semi_join_conds; + conds->fix_after_pullout(parent_lex, &conds); + + DBUG_EXECUTE("where", print_where(conds,"SJ-EXPR", QT_ORDINARY);); + + my_snprintf(tbl_alias, sizeof(alias_mask)+5, alias_mask, + hash_sj_engine->materialize_join->select_lex->select_number); + jtbm->alias= tbl_alias; + + /* Inject sj_on_expr into the parent's WHERE or ON */ + if (emb_tbl_nest) + { + DBUG_ASSERT(0); + /*emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, + sj_nest->sj_on_expr); + emb_tbl_nest->on_expr->fix_fields(parent_join->thd, &emb_tbl_nest->on_expr); + */ + } + else + { + /* Inject into the WHERE */ + parent_join->conds= and_items(parent_join->conds, conds); + parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds); + parent_join->select_lex->where= parent_join->conds; + } + + /* Don't unlink the child subselect, as the subquery will be used. */ + + DBUG_RETURN(FALSE); +} + + static TABLE_LIST *alloc_join_nest(THD *thd) { TABLE_LIST *tbl; @@ -1245,6 +1680,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) DBUG_RETURN(FALSE); } + /* Get estimated record length for semi-join materialization temptable @@ -1301,7 +1737,7 @@ static uint get_tmp_table_rec_length(List<Item> &items) return len; } -//psergey-todo: is the below a kind of table elimination?? + /* Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate @@ -1318,6 +1754,8 @@ static uint get_tmp_table_rec_length(List<Item> &items) Check again if it is feasible to factor common parts with constant table search + Also check if it's feasible to factor common parts with table elimination + RETURN TRUE - There exists an eq_ref(outer-tables) candidate FALSE - Otherwise @@ -1368,6 +1806,7 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) return FALSE; } + /* Do semi-join optimization step after we've added a new tab to join prefix @@ -2194,6 +2633,9 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) } } +enum_nested_loop_state +end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); + /* Setup semi-join materialization strategy for one semi-join nest @@ -2215,10 +2657,11 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) TRUE Error */ -bool setup_sj_materialization(JOIN_TAB *tab) +bool setup_sj_materialization(JOIN_TAB *sjm_tab) { uint i; DBUG_ENTER("setup_sj_materialization"); + JOIN_TAB *tab= sjm_tab->bush_children->start; TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding; SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info; THD *thd= tab->join->thd; @@ -2246,10 +2689,13 @@ bool setup_sj_materialization(JOIN_TAB *tab) DBUG_RETURN(TRUE); /* purecov: inspected */ sjm->table->file->extra(HA_EXTRA_WRITE_CACHE); sjm->table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + tab->join->sj_tmp_tables.push_back(sjm->table); tab->join->sjm_info_list.push_back(sjm); sjm->materialized= FALSE; + sjm_tab->table= sjm->table; + if (!sjm->is_sj_scan) { KEY *tmp_key; /* The only index on the temporary table. */ @@ -2262,8 +2708,7 @@ bool setup_sj_materialization(JOIN_TAB *tab) temptable. */ TABLE_REF *tab_ref; - if (!(tab_ref= (TABLE_REF*) thd->alloc(sizeof(TABLE_REF)))) - DBUG_RETURN(TRUE); /* purecov: inspected */ + tab_ref= &sjm_tab->ref; tab_ref->key= 0; /* The only temp table index. */ tab_ref->key_length= tmp_key->key_length; if (!(tab_ref->key_buff= @@ -2296,11 +2741,21 @@ bool setup_sj_materialization(JOIN_TAB *tab) use that information instead. */ cur_ref_buff + null_count, - null_count ? tab_ref->key_buff : 0, + null_count ? cur_ref_buff : 0, cur_key_part->length, tab_ref->items[i]); cur_ref_buff+= cur_key_part->store_length; } *ref_key= NULL; /* End marker. */ + + /* + We don't ever have guarded conditions for SJM tables, but code at SQL + layer depends on cond_guards array being alloced. + */ + if (!(tab_ref->cond_guards= (bool**) thd->calloc(sizeof(uint*)*tmp_key_parts))) + { + DBUG_RETURN(TRUE); + } + tab_ref->key_err= 1; tab_ref->key_parts= tmp_key_parts; sjm->tab_ref= tab_ref; @@ -2320,6 +2775,8 @@ bool setup_sj_materialization(JOIN_TAB *tab) if (!(sjm->in_equality= create_subq_in_equalities(thd, sjm, emb_sj_nest->sj_subq_pred))) DBUG_RETURN(TRUE); /* purecov: inspected */ + sjm_tab->type= JT_EQ_REF; + sjm_tab->select_cond= sjm->in_equality; } else { @@ -2372,9 +2829,11 @@ bool setup_sj_materialization(JOIN_TAB *tab) then substitute_for_best_equal_field() will change the conditions according to the join order: - it1 - it2 it1.col=it2.col - ot cond(it1.col) + table | attached condition + ------+-------------------- + it1 | + it2 | it1.col=it2.col + ot | cond(it1.col) although we've originally had "SELECT it2.col", conditions attached to subsequent outer tables will refer to it1.col, so SJM-Scan will @@ -2403,8 +2862,18 @@ bool setup_sj_materialization(JOIN_TAB *tab) /* The write_set for source tables must be set up to allow the copying */ bitmap_set_bit(copy_to->table->write_set, copy_to->field_index); } + sjm_tab->type= JT_ALL; + + /* Initialize full scan */ + sjm_tab->read_first_record= join_read_record_no_init; + sjm_tab->read_record.copy_field= sjm->copy_field; + sjm_tab->read_record.copy_field_end= sjm->copy_field + + sjm->sjm_table_cols.elements; + sjm_tab->read_record.read_record= rr_sequential_and_unpack; } + sjm_tab->bush_children->end[-1].next_select= end_sj_materialize; + DBUG_RETURN(FALSE); } @@ -3504,3 +3973,77 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where) } +/* + Join tab execution startup function. + + SYNOPSIS + join_tab_execution_startup() + tab Join tab to perform startup actions for + + DESCRIPTION + Join tab execution startup function. This is different from + tab->read_first_record in the regard that this has actions that are to be + done once per join execution. + + Currently there are only two possible startup functions, so we have them + both here inside if (...) branches. In future we could switch to function + pointers. + + RETURN + NESTED_LOOP_OK - OK + NESTED_LOOP_ERROR| NESTED_LOOP_KILLED - Error, abort the join execution +*/ + +enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab) +{ + Item_in_subselect *in_subs; + DBUG_ENTER("join_tab_execution_startup"); + + if (tab->table->pos_in_table_list && + (in_subs= tab->table->pos_in_table_list->jtbm_subselect)) + { + /* It's a non-merged SJM nest */ + DBUG_ASSERT(in_subs->engine->engine_type() == + subselect_engine::HASH_SJ_ENGINE); + + subselect_hash_sj_engine *hash_sj_engine= + ((subselect_hash_sj_engine*)in_subs->engine); + if (!hash_sj_engine->is_materialized) + { + hash_sj_engine->materialize_join->exec(); + hash_sj_engine->is_materialized= TRUE; + + if (hash_sj_engine->materialize_join->error || tab->join->thd->is_fatal_error) + DBUG_RETURN(NESTED_LOOP_ERROR); + } + } + else if (tab->bush_children) + { + /* It's a merged SJM nest */ + enum_nested_loop_state rc; + JOIN *join= tab->join; + SJ_MATERIALIZATION_INFO *sjm= tab->bush_children->start->emb_sj_nest->sj_mat_info; + JOIN_TAB *join_tab= tab->bush_children->start; + JOIN_TAB *save_return_tab= join->return_tab; + + if (!sjm->materialized) + { + /* + Now run the join for the inner tables. The first call is to run the + join, the second one is to signal EOF (this is essential for some + join strategies, e.g. it will make join buffering flush the records) + */ + if ((rc= sub_select(join, join_tab, FALSE/* no EOF */)) < 0 || + (rc= sub_select(join, join_tab, TRUE/* now EOF */)) < 0) + { + join->return_tab= save_return_tab; + DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/ + } + join->return_tab= save_return_tab; + sjm->materialized= TRUE; + } + } + + DBUG_RETURN(NESTED_LOOP_OK); +} + diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index e9b93085aea..539fa806ed0 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -1,4 +1,6 @@ -/* */ +/* + Semi-join subquery optimization code definitions +*/ #ifdef USE_PRAGMA_INTERFACE #pragma interface /* gcc class implementation */ @@ -365,4 +367,10 @@ int clear_sj_tmp_tables(JOIN *join); int rewrite_to_index_subquery_engine(JOIN *join); +void get_delayed_table_estimates(TABLE *table, + ha_rows *out_rows, + double *scan_time, + double *startup_cost); + +enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab); diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 89a3ce7a5fc..71c07fda852 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -7682,6 +7682,17 @@ bool setup_tables(THD *thd, Name_resolution_context *context, if (res) DBUG_RETURN(1); } + if (table_list->jtbm_subselect) + { + Item *item= table_list->jtbm_subselect; + if (item->fix_fields(thd, &item)) + { + my_error(ER_TOO_MANY_TABLES,MYF(0),MAX_TABLES); + DBUG_RETURN(1); + } + DBUG_ASSERT(item == table_list->jtbm_subselect); + table_list->jtbm_subselect->setup_engine(FALSE); + } } /* Precompute and store the row types of NATURAL/USING joins. */ diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 9cc66d4d8be..70fe97f00e1 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -2974,8 +2974,7 @@ bool select_dumpvar::send_eof() } -bool -select_materialize_with_stats:: +bool select_materialize_with_stats:: create_result_table(THD *thd_arg, List<Item> *column_types, bool is_union_distinct, ulonglong options, const char *table_alias, bool bit_fields_as_long) @@ -2994,14 +2993,29 @@ create_result_table(THD *thd_arg, List<Item> *column_types, if (!stat) return TRUE; - cleanup(); - + reset(); table->file->extra(HA_EXTRA_WRITE_CACHE); table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); return FALSE; } +void select_materialize_with_stats::reset() +{ + memset(col_stat, 0, table->s->fields * sizeof(Column_statistics)); + max_nulls_in_row= 0; + count_rows= 0; +} + + +void select_materialize_with_stats::cleanup() +{ + reset(); + select_union::cleanup(); +} + + + /** Override select_union::send_data to analyze each row for NULLs and to update null_statistics before sending data to the client. diff --git a/sql/sql_class.h b/sql/sql_class.h index 932f2234766..a7ec4b107d8 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -2830,7 +2830,7 @@ public: bool send_data(List<Item> &items); bool send_eof(); bool flush(); - + void cleanup(); virtual bool create_result_table(THD *thd, List<Item> *column_types, bool is_distinct, ulonglong options, const char *alias, bool bit_fields_as_long); @@ -2893,6 +2893,9 @@ protected: */ ha_rows count_rows; +protected: + void reset(); + public: select_materialize_with_stats() {} virtual bool create_result_table(THD *thd, List<Item> *column_types, @@ -2900,12 +2903,7 @@ public: const char *alias, bool bit_fields_as_long); bool init_result_table(ulonglong select_options); bool send_data(List<Item> &items); - void cleanup() - { - memset(col_stat, 0, table->s->fields * sizeof(Column_statistics)); - max_nulls_in_row= 0; - count_rows= 0; - } + void cleanup(); ha_rows get_null_count_of_col(uint idx) { DBUG_ASSERT(idx < table->s->fields); diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index e9923c3c983..c07bcf64d70 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -33,7 +33,6 @@ #define NO_MORE_RECORDS_IN_BUFFER (uint)(-1) - /***************************************************************************** * Join cache module ******************************************************************************/ @@ -138,6 +137,7 @@ uint add_table_data_fields_to_join_cache(JOIN_TAB *tab, return len; } +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots); /* Determine different counters of fields associated with a record in the cache @@ -158,10 +158,31 @@ uint add_table_data_fields_to_join_cache(JOIN_TAB *tab, void JOIN_CACHE::calc_record_fields() { - JOIN_TAB *tab = prev_cache ? prev_cache->join_tab : - join->join_tab+join->const_tables; - tables= join_tab-tab; - + JOIN_TAB *tab; + if (prev_cache) + tab= prev_cache->join_tab; + else + { + if (join_tab->bush_root_tab) + { + /* + If the tab we're attached to is inside an SJM-nest, start from the + first tab in that SJM nest + */ + tab= join_tab->bush_root_tab->bush_children->start; + } + else + { + /* + The tab we're attached to is not inside an SJM-nest. Start from the + first non-const table. + */ + tab= join->join_tab + join->const_tables; + } + } + start_tab= tab; + //tables= join_tab-tab; + //tables= 0; fields= 0; blobs= 0; flag_fields= 0; @@ -169,7 +190,7 @@ void JOIN_CACHE::calc_record_fields() data_field_ptr_count= 0; referenced_fields= 0; - for ( ; tab < join_tab ; tab++) + for ( ; tab != join_tab ; tab= next_linear_tab(join, tab, TRUE)) { calc_used_field_length(join->thd, tab); flag_fields+= test(tab->used_null_fields || tab->used_uneven_bit_fields); @@ -178,6 +199,7 @@ void JOIN_CACHE::calc_record_fields() blobs+= tab->used_blobs; fields+= tab->check_rowid_field(); + //tables++; } if ((with_match_flag= join_tab->use_match_flag())) flag_fields++; @@ -272,7 +294,8 @@ void JOIN_CACHE::create_flag_fields() ©); /* Create fields for all null bitmaps and null row flags that are needed */ - for (tab= join_tab-tables; tab < join_tab; tab++) + //for (tab= join_tab-tables; tab < join_tab; tab++) + for (tab= start_tab; tab != join_tab; tab= next_linear_tab(join, tab, TRUE)) { TABLE *table= tab->table; @@ -337,7 +360,8 @@ void JOIN_CACHE:: create_remaining_fields(bool all_read_fields) CACHE_FIELD *copy= field_descr+flag_fields+data_field_count; CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count; - for (tab= join_tab-tables; tab < join_tab; tab++) + for (tab= start_tab; tab != join_tab; tab= next_linear_tab(join, tab, TRUE)) + //for (tab= join_tab-tables; tab < join_tab; tab++) { MY_BITMAP *rem_field_set; TABLE *table= tab->table; @@ -558,7 +582,9 @@ int JOIN_CACHE_BKA::init() of the counting 'in local_key_arg_fields' and 'external_key_arg_fields' respectively. */ - for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++) + // for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++) + for (tab= cache->start_tab; tab != cache->join_tab; tab= + next_linear_tab(cache->join, tab, TRUE)) { uint key_args; bitmap_clear_all(&tab->table->tmp_set); @@ -598,7 +624,9 @@ int JOIN_CACHE_BKA::init() while (ext_key_arg_cnt) { cache= cache->prev_cache; - for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++) + for (tab= cache->start_tab; tab != cache->join_tab; tab= + next_linear_tab(cache->join, tab, TRUE)) + //for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++) { CACHE_FIELD *copy_end; MY_BITMAP *key_read_set= &tab->table->tmp_set; @@ -641,7 +669,8 @@ int JOIN_CACHE_BKA::init() /* Now create local fields that are used to build ref for this key access */ copy= field_descr+flag_fields; - for (tab= join_tab-tables; tab < join_tab ; tab++) + //for (tab= join_tab-tables; tab < join_tab ; tab++) + for (tab= start_tab; tab != join_tab; tab= next_linear_tab(join, tab, TRUE)) { length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set, &data_field_count, ©, @@ -1771,13 +1800,18 @@ enum_nested_loop_state JOIN_CACHE_BNL::join_matching_records(bool skip_last) join_tab->select->quick= 0; } - for (tab= join->join_tab; tab != join_tab ; tab++) + //for (tab= join->join_tab; tab != join_tab ; tab++) + for (tab= start_tab ; tab != join_tab ; tab= next_linear_tab(join, tab, TRUE)) { tab->status= tab->table->status; tab->table->status= 0; } /* Start retrieving all records of the joined table */ + + if ((rc= join_tab_execution_startup(join_tab)) < 0) + goto finish; + if ((error= join_init_read_record(join_tab))) { rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR; @@ -1841,7 +1875,8 @@ enum_nested_loop_state JOIN_CACHE_BNL::join_matching_records(bool skip_last) if (error > 0) // Fatal error rc= NESTED_LOOP_ERROR; finish: - for (tab= join->join_tab; tab != join_tab ; tab++) + //for (tab= join->join_tab; tab != join_tab ; tab++) + for (tab= start_tab ; tab != join_tab ; tab= next_linear_tab(join, tab, TRUE)) tab->table->status= tab->status; return rc; } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 731f4a5901f..3dd325380e4 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -97,7 +97,7 @@ static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item); static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after); static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables); static void update_depend_map(JOIN *join); -static void update_depend_map(JOIN *join, ORDER *order); +static void update_depend_map_for_order(JOIN *join, ORDER *order); static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond, bool change_list, bool *simple_order); static int return_zero_rows(JOIN *join, select_result *res,TABLE_LIST *tables, @@ -233,8 +233,6 @@ static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab); void get_partial_join_cost(JOIN *join, uint idx, double *read_time_arg, double *record_count_arg); static uint make_join_orderinfo(JOIN *join); -static int -join_read_record_no_init(JOIN_TAB *tab); Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, bool *inherited_fl); @@ -1022,15 +1020,26 @@ JOIN::optimize() /* Permorm the the optimization on fields evaluation mentioned above for all on expressions. - */ - for (JOIN_TAB *tab= join_tab + const_tables; tab < join_tab + tables ; tab++) + */ + { - if (*tab->on_expr_ref) + List_iterator<JOIN_TAB_RANGE> it(join_tab_ranges); + JOIN_TAB_RANGE *jt_range; + uint first_tab_offs= const_tables; + while ((jt_range= it++)) { - *tab->on_expr_ref= substitute_for_best_equal_field(*tab->on_expr_ref, - tab->cond_equal, - map2table); - (*tab->on_expr_ref)->update_used_tables(); + for (JOIN_TAB *tab= jt_range->start + first_tab_offs; + tab < jt_range->end; tab++) + { + if (*tab->on_expr_ref) + { + *tab->on_expr_ref= substitute_for_best_equal_field(*tab->on_expr_ref, + tab->cond_equal, + map2table); + (*tab->on_expr_ref)->update_used_tables(); + } + } + first_tab_offs= 0; } } @@ -1039,6 +1048,7 @@ JOIN::optimize() { conds=new Item_int((longlong) 0,1); // Always false } + if (make_join_select(this, select, conds)) { zero_result_cause= @@ -1301,8 +1311,11 @@ JOIN::optimize() */ if (need_tmp || select_distinct || group_list || order) { - for (uint i = const_tables; i < tables; i++) - join_tab[i].table->prepare_for_position(); + for (uint i= 0; i < tables; i++) + { + if (!(table[i]->map & const_table_map)) + table[i]->prepare_for_position(); + } } DBUG_EXECUTE("info",TEST_join(this);); @@ -2118,7 +2131,7 @@ JOIN::exec() WHERE clause for any tables after the sorted one. */ JOIN_TAB *curr_table= &curr_join->join_tab[curr_join->const_tables+1]; - JOIN_TAB *end_table= &curr_join->join_tab[curr_join->tables]; + JOIN_TAB *end_table= &curr_join->join_tab[curr_join->top_jtrange_tables]; for (; curr_table < end_table ; curr_table++) { /* @@ -2470,7 +2483,7 @@ bool JOIN::setup_subquery_materialization() { Item_in_subselect *in_subs= (Item_in_subselect*) subquery_predicate; if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION && - in_subs->setup_engine()) + in_subs->setup_engine(FALSE)) return TRUE; } } @@ -2556,9 +2569,10 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, DBUG_ENTER("make_join_statistics"); table_count=join->tables; - stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*table_count); + + stat=(JOIN_TAB*) join->thd->calloc(sizeof(JOIN_TAB)*(table_count)); stat_ref=(JOIN_TAB**) join->thd->alloc(sizeof(JOIN_TAB*)*MAX_TABLES); - table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*(table_count*2)); + table_vector=(TABLE**) join->thd->alloc(sizeof(TABLE*)*((table_count)*2)); if (!stat || !stat_ref || !table_vector) DBUG_RETURN(1); // Eom /* purecov: inspected */ @@ -2567,7 +2581,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, stat_end=stat+table_count; found_const_table_map= all_table_map=0; const_count=0; - + for (s= stat, i= 0; tables; s++, tables= tables->next_leaf, i++) @@ -2590,24 +2604,27 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, table->reginfo.join_tab=s; table->reginfo.not_exists_optimize=0; bzero((char*) table->const_key_parts, sizeof(key_part_map)*table->s->keys); - all_table_map|= table->map; + all_table_map|= s->table->map; s->join=join; s->info=0; // For describe + s->bush_root_tab= NULL; s->dependent= tables->dep_tables; s->key_dependent= 0; if (tables->schema_table) table->file->stats.records= 2; table->quick_condition_rows= table->file->stats.records; - + s->on_expr_ref= &tables->on_expr; if (*s->on_expr_ref) { /* s is the only inner table of an outer join */ #ifdef WITH_PARTITION_STORAGE_ENGINE - if ((!table->file->stats.records || table->no_partitions_used) && !embedding) + if (!table->is_filled_at_execution() && + (!table->file->stats.records || table->no_partitions_used) && !embedding) #else - if (!table->file->stats.records && !embedding) + if (!table->is_filled_at_execution() && + !table->file->stats.records && !embedding) #endif { // Empty table s->dependent= 0; // Ignore LEFT JOIN depend. @@ -2641,7 +2658,8 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, #else const bool no_partitions_used= FALSE; #endif - if ((table->s->system || table->file->stats.records <= 1 || + if (!table->is_filled_at_execution() && + (table->s->system || table->file->stats.records <= 1 || no_partitions_used) && !s->dependent && (table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) && @@ -2651,6 +2669,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, no_rows_const_tables |= table->map; } } + stat_vector[i]=0; join->outer_join=outer_join; @@ -2739,6 +2758,9 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, { table=s->table; + if (table->is_filled_at_execution()) + continue; + /* If equi-join condition by a key is null rejecting and after a substitution of a const table the key value happens to be null @@ -2891,15 +2913,30 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, for (s=stat ; s < stat_end ; s++) { + s->startup_cost= 0; if (s->type == JT_SYSTEM || s->type == JT_CONST) { /* Only one matching row */ - s->found_records=s->records=s->read_time=1; s->worst_seeks=1.0; + s->found_records= s->records= 1; + s->read_time=1.0; + s->worst_seeks=1.0; continue; } /* Approximate found rows and time to read them */ - s->found_records=s->records=s->table->file->stats.records; - s->read_time=(ha_rows) s->table->file->scan_time(); + + if (s->table->is_filled_at_execution()) + { + get_delayed_table_estimates(s->table, &s->records, &s->read_time, + &s->startup_cost); + s->found_records= s->records; + table->quick_condition_rows=s->records; + } + else + { + s->found_records= s->records= s->table->file->stats.records; + s->read_time= s->table->file->scan_time(); + } + /* Set a max range of how many seeks we can expect when using keys @@ -2922,10 +2959,11 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, Don't do range analysis if we're on the inner side of an outer join (2). Do range analysis if we're on the inner side of a semi-join (3). */ - if (!s->const_keys.is_clear_all() && // (1) - (!s->table->pos_in_table_list->embedding || // (2) - (s->table->pos_in_table_list->embedding && // (3) - s->table->pos_in_table_list->embedding->sj_on_expr))) // (3) + if (!s->const_keys.is_clear_all() && // (1) + (!s->table->pos_in_table_list->embedding || // (2) + (s->table->pos_in_table_list->embedding && // (3) + s->table->pos_in_table_list->embedding->sj_on_expr)) && // (3) + !s->table->is_filled_at_execution()) { ha_rows records; SQL_SELECT *select; @@ -2963,7 +3001,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, if (records != HA_POS_ERROR) { s->found_records=records; - s->read_time= (ha_rows) (s->quick ? s->quick->read_time : 0.0); + s->read_time= s->quick ? s->quick->read_time : 0.0; } delete select; } @@ -2974,7 +3012,7 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, join->join_tab=stat; join->map2table=stat_ref; - join->table= join->all_tables=table_vector; + join->table= table_vector; join->const_tables=const_count; join->found_const_table_map=found_const_table_map; @@ -4135,7 +4173,7 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array) for (tablenr=0 ; ! (map & 1) ; map>>=1, tablenr++) ; if (map == 1) // Only one table { - TABLE *tmp_table=join->all_tables[tablenr]; + TABLE *tmp_table=join->table[tablenr]; keyuse->ref_table_rows= max(tmp_table->file->stats.records, 100); } } @@ -4642,8 +4680,9 @@ best_access_path(JOIN *join, else tmp= best_time; // Do nothing } - loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); + tmp += s->startup_cost; + loose_scan_opt.check_ref_access_part2(key, start_key, records, tmp); } /* not ft_key */ if (tmp < best_time - records/(double) TIME_FOR_COMPARE) { @@ -4685,13 +4724,19 @@ best_access_path(JOIN *join, Since we have a 'ref' access path, and FORCE INDEX instructs us to choose it over ALL/index, there is no need to consider a full table scan. + (5) Non-flattenable semi-joins: don't consider doing a scan of temporary + table if we had an option to make lookups into it. In real-world cases, + lookups are cheaper than full scans, but when the table is small, they + can be [considered to be] more expensive, which causes lookups not to + be used for cases with small datasets, which is annoying. */ if ((records >= s->found_records || best > s->read_time) && // (1) !(s->quick && best_key && s->quick->index == best_key->key && // (2) best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&// (2) !((s->table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) ! s->table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3) - !(s->table->force_index && best_key && !s->quick)) // (4) + !(s->table->force_index && best_key && !s->quick) && // (4) + !(best_key && s->table->pos_in_table_list->jtbm_subselect)) // (5) { // Check full join ha_rows rnd_records= s->found_records; /* @@ -4738,8 +4783,7 @@ best_access_path(JOIN *join, } else { - /* Estimate cost of reading table. */ - tmp= s->table->file->scan_time(); + tmp= s->read_time; if ((s->table->map & join->outer_join) || disable_jbuf) // Can't use join cache { /* @@ -4768,6 +4812,7 @@ best_access_path(JOIN *join, } } + tmp += s->startup_cost; /* We estimate the cost of evaluating WHERE clause for found records as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus @@ -4790,7 +4835,7 @@ best_access_path(JOIN *join, join->outer_join))); } } - + /* Update the cost information for the current partial plan */ pos->records_read= records; pos->read_time= best; @@ -5244,7 +5289,7 @@ greedy_search(JOIN *join, 'join->best_positions' contains a complete optimal extension of the current partial QEP. */ - DBUG_EXECUTE("opt", print_plan(join, join->tables, + DBUG_EXECUTE("opt", print_plan(join, n_tables, record_count, read_time, read_time, "optimal");); DBUG_RETURN(FALSE); @@ -5735,8 +5780,8 @@ void calc_used_field_length(THD *thd, JOIN_TAB *join_tab) rec_length+=(uint) max(4,blob_length); } /* - psergey-todo: why we don't count here rowid that we might need to store - when using DuplicateElimination? + TODO: why we don't count here rowid that we might need to store when + using DuplicateElimination? */ join_tab->used_fields=fields; join_tab->used_fieldlength=rec_length; @@ -5851,6 +5896,102 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref) } +JOIN_TAB *first_linear_tab(JOIN *join, bool after_const_tables) +{ + JOIN_TAB *first= join->join_tab; + if (after_const_tables) + first+= join->const_tables; + if (first < join->join_tab + join->top_jtrange_tables) + return first; + return NULL; +} + + +/* + A helper function to loop over all join's join_tab in sequential fashion + + DESCRIPTION + Depending on include_bush_roots parameter, JOIN_TABS that represent + SJM-scan/lookups are produced or omitted. + + SJM-Bush children are returned right after (or in place of) their container + join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems + to.) +*/ + +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool include_bush_roots) +{ + if (include_bush_roots && tab->bush_children) + return tab->bush_children->start; + + DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab); + if (tab->last_leaf_in_bush) + tab= tab->bush_root_tab; + + if (tab->bush_root_tab) + return ++tab; + + if (++tab == join->join_tab + join->top_jtrange_tables) + return NULL; + + if (!include_bush_roots && tab->bush_children) + tab= tab->bush_children->start; + + return tab; +} + + +/* + A helper function to iterate over all join tables in bush-children-first order + + DESCRIPTION + + For example, for this join plan + + ot1 ot2 sjm ot3 + | +--------+ + | | + it1 it2 it3 + + + the function will return + + ot1-ot2-it1-it2-it3-sjm-ot3 ... + +*/ + +JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab) +{ + bool start= FALSE; + if (tab == NULL) + { + /* This means we're starting the enumeration */ + if (join->const_tables == join->top_jtrange_tables) + return NULL; + + tab= join->join_tab + join->const_tables; + start= TRUE; + } + + if (tab->last_leaf_in_bush) + return tab->bush_root_tab; + + /* Move to next tab in the array we're traversing*/ + if (!start) + tab++; + + if (tab == join->join_tab_ranges.head()->end) + return NULL; /* End */ + + if (tab->bush_children) + return tab->bush_children->start; + + return tab; +} + + +static Item * const null_ptr= NULL; + /* Set up join struct according to the picked join order in @@ -5865,7 +6006,12 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref) fix_semijoin_strategies_for_picked_join_order) - create join->join_tab array and put there the JOIN_TABs in the join order - create data structures describing ref access methods. - + + NOTE + In this function we switch from pre-join-optimization JOIN_TABs to + post-join-optimization JOIN_TABs. This is achieved by copying the entire + JOIN_TAB objects. + RETURN FALSE OK TRUE Out of memory @@ -5874,7 +6020,7 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref) static bool get_best_combination(JOIN *join) { - uint i,tablenr; + uint tablenr; table_map used_tables; JOIN_TAB *join_tab,*j; KEYUSE *keyuse; @@ -5892,27 +6038,89 @@ get_best_combination(JOIN *join) used_tables= OUTER_REF_TABLE_BIT; // Outer row is already read fix_semijoin_strategies_for_picked_join_order(join); - + + JOIN_TAB_RANGE *root_range= new JOIN_TAB_RANGE; + root_range->start= join->join_tab; + /* root_range->end will be set later */ + join->join_tab_ranges.empty(); + join->join_tab_ranges.push_back(root_range); + + JOIN_TAB *sjm_nest_end= NULL; + JOIN_TAB *sjm_saved_tab; /* protected by sjm_nest_end */ + for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++) { TABLE *form; + POSITION *cur_pos= &join->best_positions[tablenr]; + if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE || + cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) + { + /* + Ok, we've entered an SJ-Materialization semi-join (note that this can't + be done recursively, semi-joins are not allowed to be nested). + */ + /* + 1. Put into main join order a JOIN_TAB that represents a lookup or scan + in the temptable. + // TODO: record this join_tab to be processed by + // setup_semijoin_elimination? + */ + bzero(j, sizeof(JOIN_TAB)); + j->join= join; + j->table= NULL; //temporary way to tell SJM tables from others. + j->ref.key = -1; + j->ref.key_parts=0; + j->loosescan_match_tab= NULL; //non-nulls will be set later + j->use_join_cache= FALSE; + j->on_expr_ref= (Item**) &null_ptr; + j->cache= NULL; + j->keys= key_map(1); // The unique index is always in 'possible keys' in EXPLAIN + + + /* + 2. Proceed with processing SJM nest's join tabs, putting them into the + sub-order + */ + SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info; + j->records= j->records_read= sjm->is_sj_scan? sjm->rows : 1; + JOIN_TAB *jt= (JOIN_TAB*)join->thd->alloc(sizeof(JOIN_TAB) * sjm->tables); + JOIN_TAB_RANGE *jt_range= new JOIN_TAB_RANGE; + jt_range->start= jt; + jt_range->end= jt + sjm->tables; + //sjm->jt_range= jt_range; + join->join_tab_ranges.push_back(jt_range); + j->bush_children= jt_range; + j->bush_root_tab= NULL; //note: a lot of code depends on bush nodes not containing one another + j->quick= NULL; + sjm_nest_end= jt + sjm->tables; + sjm_saved_tab= j; + root_range->end= j+1; + + j= jt; + //goto loop_end_not_table; + } + *j= *join->best_positions[tablenr].table; + + if (sjm_nest_end) + j->bush_root_tab= sjm_saved_tab; + else + root_range->end= j+1; form=join->table[tablenr]=j->table; - //psergey-merge: or is the above: form=join->all_tables[tablenr]=j->table; used_tables|= form->map; form->reginfo.join_tab=j; if (!*j->on_expr_ref) form->reginfo.not_exists_optimize=0; // Only with LEFT JOIN DBUG_PRINT("info",("type: %d", j->type)); if (j->type == JT_CONST) - continue; // Handled in make_join_stat.. + goto loop_end; // Handled in make_join_stat.. j->loosescan_match_tab= NULL; //non-nulls will be set later j->ref.key = -1; j->ref.key_parts=0; if (j->type == JT_SYSTEM) - continue; + goto loop_end; if (j->keys.is_clear_all() || !(keyuse= join->best_positions[tablenr].key) || (join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN)) { @@ -5923,10 +6131,24 @@ get_best_combination(JOIN *join) } else if (create_ref_for_key(join, j, keyuse, used_tables)) DBUG_RETURN(TRUE); // Something went wrong + loop_end: + j->records_read= join->best_positions[tablenr].records_read; + join->map2table[j->table->tablenr]= j; + + // If we've reached the end of sjm nest, switch back to main sequence + if (j + 1 == sjm_nest_end) + { + j->last_leaf_in_bush= TRUE; + j= sjm_saved_tab; + sjm_nest_end= NULL; + } } - for (i=0 ; i < table_count ; i++) - join->map2table[join->join_tab[i].table->tablenr]=join->join_tab+i; + join->top_jtrange_tables= join->join_tab_ranges.head()->end - + join->join_tab_ranges.head()->start; + + //for (i=0 ; i < table_count ; i++) + // join->map2table[join->join_tab[i].table->tablenr]=join->join_tab+i; update_depend_map(join); DBUG_RETURN(0); } @@ -6176,6 +6398,8 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table) join_tab= parent->join_tab_reexec; table= &parent->table_reexec[0]; parent->table_reexec[0]= temp_table; + top_jtrange_tables= 1; + tables= 1; const_tables= 0; const_table_map= 0; @@ -6220,6 +6444,9 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table) join_tab->do_firstmatch= NULL; join_tab->loosescan_match_tab= NULL; join_tab->emb_sj_nest= NULL; + join_tab->bush_root_tab= NULL; + join_tab->bush_children= NULL; + join_tab->last_leaf_in_bush= FALSE; bzero((char*) &join_tab->read_record,sizeof(join_tab->read_record)); temp_table->status=0; temp_table->null_row=0; @@ -6244,6 +6471,7 @@ inline void add_cond_and_fix(Item **e1, Item *e2) } + /** Add to join_tab->select_cond[i] "table.field IS NOT NULL" conditions we've inferred from ref/eq_ref access performed. @@ -6298,9 +6526,11 @@ inline void add_cond_and_fix(Item **e1, Item *e2) static void add_not_null_conds(JOIN *join) { DBUG_ENTER("add_not_null_conds"); - for (uint i=join->const_tables ; i < join->tables ; i++) + + for (JOIN_TAB *tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) { - JOIN_TAB *tab=join->join_tab+i; if ((tab->type == JT_REF || tab->type == JT_EQ_REF || tab->type == JT_REF_OR_NULL) && !tab->table->maybe_null) @@ -6424,58 +6654,72 @@ static void make_outerjoin_info(JOIN *join) { DBUG_ENTER("make_outerjoin_info"); - for (uint i=join->const_tables ; i < join->tables ; i++) - { - JOIN_TAB *tab=join->join_tab+i; - TABLE *table=tab->table; - TABLE_LIST *tbl= table->pos_in_table_list; - TABLE_LIST *embedding= tbl->embedding; + bool top= TRUE; + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; - if (tbl->outer_join) + while ((jt_range= it++)) + { + for (JOIN_TAB *tab=jt_range->start + (top ? join->const_tables : 0); + tab != jt_range->end; tab++) { + TABLE *table=tab->table; /* - Table tab is the only one inner table for outer join. - (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a - is in the query above.) + psergey: The following is probably incorrect, fix it when we get + semi+outer joins processing to work: */ - tab->last_inner= tab->first_inner= tab; - tab->on_expr_ref= &tbl->on_expr; - tab->cond_equal= tbl->cond_equal; - if (embedding) - tab->first_upper= embedding->nested_join->first_nested; - } - for ( ; embedding ; embedding= embedding->embedding) - { - /* Ignore sj-nests: */ - if (!embedding->on_expr) + if (!table) continue; - NESTED_JOIN *nested_join= embedding->nested_join; - if (!nested_join->counter) + TABLE_LIST *tbl= table->pos_in_table_list; + TABLE_LIST *embedding= tbl->embedding; + + if (tbl->outer_join) { /* - Table tab is the first inner table for nested_join. - Save reference to it in the nested join structure. - */ - nested_join->first_nested= tab; - tab->on_expr_ref= &embedding->on_expr; + Table tab is the only one inner table for outer join. + (Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a + is in the query above.) + */ + tab->last_inner= tab->first_inner= tab; + tab->on_expr_ref= &tbl->on_expr; tab->cond_equal= tbl->cond_equal; - if (embedding->embedding) - tab->first_upper= embedding->embedding->nested_join->first_nested; - } - if (!tab->first_inner) - tab->first_inner= nested_join->first_nested; - if (tab->table->reginfo.not_exists_optimize) - tab->first_inner->table->reginfo.not_exists_optimize= 1; - if (++nested_join->counter < nested_join->n_tables) - break; - /* Table tab is the last inner table for nested join. */ - nested_join->first_nested->last_inner= tab; - if (tab->first_inner->table->reginfo.not_exists_optimize) + if (embedding) + tab->first_upper= embedding->nested_join->first_nested; + } + for ( ; embedding ; embedding= embedding->embedding) { - for (JOIN_TAB *join_tab= tab->first_inner; join_tab <= tab; join_tab++) - join_tab->table->reginfo.not_exists_optimize= 1; - } + /* Ignore sj-nests: */ + if (!embedding->on_expr) + continue; + NESTED_JOIN *nested_join= embedding->nested_join; + if (!nested_join->counter) + { + /* + Table tab is the first inner table for nested_join. + Save reference to it in the nested join structure. + */ + nested_join->first_nested= tab; + tab->on_expr_ref= &embedding->on_expr; + tab->cond_equal= tbl->cond_equal; + if (embedding->embedding) + tab->first_upper= embedding->embedding->nested_join->first_nested; + } + if (!tab->first_inner) + tab->first_inner= nested_join->first_nested; + if (tab->table->reginfo.not_exists_optimize) + tab->first_inner->table->reginfo.not_exists_optimize= 1; + if (++nested_join->counter < nested_join->n_tables) + break; + /* Table tab is the last inner table for nested join. */ + nested_join->first_nested->last_inner= tab; + if (tab->first_inner->table->reginfo.not_exists_optimize) + { + for (JOIN_TAB *join_tab= tab->first_inner; join_tab <= tab; join_tab++) + join_tab->table->reginfo.not_exists_optimize= 1; + } + } } + top= FALSE; } DBUG_VOID_RETURN; } @@ -6518,8 +6762,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) join->const_table_map, (table_map) 0, TRUE); DBUG_EXECUTE("where",print_where(const_cond,"constants", QT_ORDINARY);); - for (JOIN_TAB *tab= join->join_tab+join->const_tables; - tab < join->join_tab+join->tables ; tab++) + for (JOIN_TAB *tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, FALSE)) { if (*tab->on_expr_ref) { @@ -6558,15 +6803,21 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); JOIN_TAB *tab; table_map current_map; - for (uint i=join->const_tables ; i < join->tables ; i++) + uint i= join->const_tables; + for (tab= next_depth_first_tab(join, NULL); tab; + tab= next_depth_first_tab(join, tab), i++) { - tab= join->join_tab+i; /* first_inner is the X in queries like: SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X */ - JOIN_TAB *first_inner_tab= tab->first_inner; - current_map= tab->table->map; + JOIN_TAB *first_inner_tab= tab->first_inner; + + if (tab->table) + current_map= tab->table->map; + else + current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables; + bool use_quick_range=0; COND *tmp; @@ -6613,8 +6864,20 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } tmp= NULL; + if (cond) - tmp= make_cond_for_table(cond, used_tables, current_map, FALSE); + { + if (tab->bush_children) + { + // Reached the materialization tab + tmp= make_cond_after_sjm(cond, cond, save_used_tables, used_tables); + used_tables= save_used_tables | used_tables; + save_used_tables= 0; + } + else + tmp= make_cond_for_table(cond, used_tables, current_map, FALSE); + } + if (cond && !tmp && tab->quick) { // Outer join if (tab->type != JT_ALL) @@ -6642,7 +6905,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL || tab->type == JT_EQ_REF || first_inner_tab) { - DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY);); + DBUG_EXECUTE("where",print_where(tmp, + tab->table? tab->table->alias :"sjm-nest", + QT_ORDINARY);); SQL_SELECT *sel= tab->select= ((SQL_SELECT*) thd->memdup((uchar*) select, sizeof(*select))); @@ -6666,16 +6931,19 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) tab->set_select_cond(tmp, __LINE__); /* Push condition to storage engine if this is enabled and the condition is not guarded */ - tab->table->file->pushed_cond= NULL; - if (thd->variables.engine_condition_pushdown && !first_inner_tab) + if (tab->table) { - COND *push_cond= - make_cond_for_table(tmp, current_map, current_map, FALSE); - if (push_cond) + tab->table->file->pushed_cond= NULL; + if (thd->variables.engine_condition_pushdown && !first_inner_tab) { - /* Push condition to handler */ - if (!tab->table->file->cond_push(push_cond)) - tab->table->file->pushed_cond= push_cond; + COND *push_cond= + make_cond_for_table(tmp, current_map, current_map, FALSE); + if (push_cond) + { + /* Push condition to handler */ + if (!tab->table->file->cond_push(push_cond)) + tab->table->file->pushed_cond= push_cond; + } } } } @@ -6686,7 +6954,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } sel->head=tab->table; - DBUG_EXECUTE("where",print_where(tmp,tab->table->alias, QT_ORDINARY);); + DBUG_EXECUTE("where",print_where(tmp,tab->table? tab->table->alias: "sjm-nest", QT_ORDINARY);); if (tab->quick) { /* Use quick key read if it's a constant and it's not used @@ -6705,7 +6973,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } tab->quick=0; } - uint ref_key=(uint) sel->head->reginfo.join_tab->ref.key+1; + uint ref_key= sel->head? (uint) sel->head->reginfo.join_tab->ref.key+1 : 0; if (i == join->const_tables && ref_key) { if (!tab->const_keys.is_clear_all() && @@ -6724,12 +6992,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) the index if we are using limit and this is the first table */ - if ((cond && - (!tab->keys.is_subset(tab->const_keys) && i > 0)) || - (!tab->const_keys.is_clear_all() && i == join->const_tables && - join->unit->select_limit_cnt < - join->best_positions[i].records_read && - !(join->select_options & OPTION_FOUND_ROWS))) + if (!tab->table->is_filled_at_execution() && + ((cond && (!tab->keys.is_subset(tab->const_keys) && i > 0)) || + (!tab->const_keys.is_clear_all() && i == join->const_tables && + join->unit->select_limit_cnt < + join->best_positions[i].records_read && + !(join->select_options & OPTION_FOUND_ROWS)))) { /* Join with outer join condition */ COND *orig_cond=sel->cond; @@ -6812,7 +7080,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } /* - Push down conditions from all on expressions. + Push down conditions from all ON expressions. Each of these conditions are guarded by a variable that turns if off just before null complemented row for outer joins is formed. Thus, the condition from an @@ -6821,8 +7089,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) */ /* First push down constant conditions from on expressions */ - for (JOIN_TAB *join_tab= join->join_tab+join->const_tables; - join_tab < join->join_tab+join->tables ; join_tab++) + for (JOIN_TAB *join_tab= first_linear_tab(join, TRUE); + join_tab; + join_tab= next_linear_tab(join, join_tab, FALSE)) { if (*join_tab->on_expr_ref) { @@ -6847,10 +7116,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) } } - /* Push down non-constant conditions from on expressions */ + /* Push down non-constant conditions from ON expressions */ JOIN_TAB *last_tab= tab; while (first_inner_tab && first_inner_tab->last_inner == last_tab) - { + { /* Table tab is the last inner table of an outer join. An on expression is always attached to it. @@ -6859,8 +7128,18 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) table_map used_tables2= (join->const_table_map | OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); - for (tab= join->join_tab+join->const_tables; tab <= last_tab ; tab++) + for (JOIN_TAB *tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE)) { + if (!tab->table) + { + /* + psergey-todo: this is probably incorrect, fix this when we get + correct processing for outer joins + semi joins + */ + continue; + } current_map= tab->table->map; used_tables2|= current_map; COND *tmp_cond= make_cond_for_table(on_expr, used_tables2, @@ -6901,37 +7180,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) cond_tab->select_cond->quick_fix_field(); if (cond_tab->select) cond_tab->select->cond= cond_tab->select_cond; - } + } + if (tab == last_tab) + break; } first_inner_tab= first_inner_tab->first_upper; } - - if (save_used_tables && !(used_tables & - ~(tab->emb_sj_nest->sj_inner_tables | - join->const_table_map | PSEUDO_TABLE_BITS))) - { - /* - We have reached the end of semi join nest. That is, the join order - looks like this: - - outer_tbl1 SJ-Materialize(inner_tbl1 ... inner_tblN) outer_tbl ... - ^ - \-we're here - At this point, we need to produce two conditions - - A condition that can be checked when we have all of the sj-inner - tables (inner_tbl1 ... inner_tblN). This will be used while doing - materialization. - - A condition that can be checked when we have all of the tables - in the prefix (both inner and outer). - */ - tab->emb_sj_nest->sj_mat_info->join_cond= - cond ? - make_cond_after_sjm(cond, cond, save_used_tables, used_tables): - NULL; - used_tables= save_used_tables | used_tables; - save_used_tables= 0; - } - } } DBUG_RETURN(0); @@ -7120,7 +7374,7 @@ void revise_cache_usage(JOIN_TAB *join_tab) SYNOPSIS end_sj_materialize() join The join - join_tab Last join table + join_tab Points to right after the last join_tab in materialization bush end_of_records FALSE <=> This call is made to pass another record combination TRUE <=> EOF (no action) @@ -7138,7 +7392,7 @@ void revise_cache_usage(JOIN_TAB *join_tab) NESTED_LOOP_ERROR */ -static enum_nested_loop_state +enum_nested_loop_state end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) { int error; @@ -7278,15 +7532,17 @@ uint check_join_cache_usage(JOIN_TAB *tab, uint i= tab - join->join_tab; *icp_other_tables_ok= TRUE; - if (cache_level == 0 || i == join->const_tables) + /* + Don't use join cache if @@join_cache_level==0 or this table is the first + one join suborder (either at top level or inside a bush) + */ + if (cache_level == 0 || tab == join->join_tab + join->const_tables || + (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)) return 0; if (options & SELECT_NO_JOIN_CACHE) goto no_join_cache; - /* - psergey-todo: why the below when execution code seems to handle the - "range checked for each record" case? - */ + if (tab->use_quick == 2) goto no_join_cache; /* @@ -7304,8 +7560,8 @@ uint check_join_cache_usage(JOIN_TAB *tab, Don't use join buffering if we're dictated not to by no_jbuf_after (this ...) */ - if (!(i <= no_jbuf_after) || tab->loosescan_match_tab || - sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) + if ((!tab->bush_root_tab? !(i <= no_jbuf_after) : FALSE) || + tab->loosescan_match_tab || tab->bush_children) goto no_join_cache; for (JOIN_TAB *first_inner= tab->first_inner; first_inner; @@ -7413,8 +7669,8 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) bool statistics= test(!(join->select_options & SELECT_DESCRIBE)); bool ordered_set= 0; bool sorted= 1; - uint first_sjm_table= MAX_TABLES; - uint last_sjm_table= MAX_TABLES; + //uint first_sjm_table= MAX_TABLES; + //uint last_sjm_table= MAX_TABLES; DBUG_ENTER("make_join_readinfo"); @@ -7422,15 +7678,14 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) setup_semijoin_dups_elimination(join, options, no_jbuf_after)) DBUG_RETURN(TRUE); /* purecov: inspected */ - for (i=join->const_tables ; i < join->tables ; i++) + //for (i=join->const_tables ; i < join->tables ; i++) + i= 0; + for (JOIN_TAB *tab= first_linear_tab(join, TRUE); + tab; + tab= next_linear_tab(join, tab, TRUE), i++) { - JOIN_TAB *tab=join->join_tab+i; TABLE *table=tab->table; bool icp_other_tables_ok; - tab->read_record.table= table; - tab->read_record.file=table->file; - tab->read_record.unlock_row= rr_unlock_row; - tab->next_select=sub_select; /* normal select */ /* Determine if the set is already ordered for ORDER BY, so it can @@ -7448,6 +7703,24 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) tab->sorted= sorted; sorted= 0; // only first must be sorted + + if (tab->bush_children) + { + if (setup_sj_materialization(tab)) + return TRUE; + table= tab->table; + } + + tab->read_record.table= table; + tab->read_record.file=table->file; + tab->read_record.unlock_row= rr_unlock_row; + + if (!(tab->bush_root_tab && + tab->bush_root_tab->bush_children->end == tab + 1)) + { + tab->next_select=sub_select; /* normal select */ + } + if (tab->loosescan_match_tab) { if (!(tab->loosescan_buf= (uchar*)join->thd->alloc(tab-> @@ -7455,19 +7728,6 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) return TRUE; /* purecov: inspected */ tab->sorted= TRUE; } - if (sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) - { - /* This is a start of semi-join nest */ - first_sjm_table= i; - last_sjm_table= i + join->best_positions[i].n_sj_tables; - if (i == join->const_tables) - join->first_select= sub_select_sjm; - else - tab[-1].next_select= sub_select_sjm; - - if (setup_sj_materialization(tab)) - return TRUE; - } table->status=STATUS_NO_RECORD; pick_table_access_method (tab); @@ -7555,7 +7815,8 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) } else { - tab->read_first_record= join_init_read_record; + if (!tab->bush_children) + tab->read_first_record= join_init_read_record; if (i == join->const_tables) { if (tab->select && tab->select->quick) @@ -7630,13 +7891,16 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) abort(); /* purecov: deadcode */ } } - join->join_tab[join->tables-1].next_select=0; /* Set by do_select */ + uint n_top_tables= join->join_tab_ranges.head()->end - + join->join_tab_ranges.head()->start; + join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */ -/* + /* If a join buffer is used to join a table the ordering by an index for the first non-constant table cannot be employed anymore. */ - for (i=join->const_tables ; i < join->tables ; i++) + //for (i=join->const_tables ; i < join->tables ; i++) + for (i=join->const_tables ; i < n_top_tables ; i++) { JOIN_TAB *tab=join->join_tab+i; if (tab->use_join_cache) @@ -7693,6 +7957,9 @@ bool error_if_full_join(JOIN *join) /** cleanup JOIN_TAB. + + DESCRIPTION + This is invoked when we've finished all join executions. */ void JOIN_TAB::cleanup() @@ -7711,6 +7978,12 @@ void JOIN_TAB::cleanup() { table->disable_keyread(); table->file->ha_index_or_rnd_end(); + + if (table->pos_in_table_list && + table->pos_in_table_list->jtbm_subselect) + { + table->pos_in_table_list->jtbm_subselect->cleanup(); + } /* We need to reset this for next select (Tested in part_of_refkey) @@ -7839,7 +8112,7 @@ void JOIN::cleanup(bool full) if (table) { - JOIN_TAB *tab,*end; + JOIN_TAB *tab; /* Only a sorted table may be cached. This sorted table is always the first non const table in join->table @@ -7852,13 +8125,15 @@ void JOIN::cleanup(bool full) if (full) { - for (tab= join_tab, end= tab+tables; tab != end; tab++) + for (tab= top_jtrange_tables?join_tab:NULL; tab; tab= next_linear_tab(this, tab, TRUE)) tab->cleanup(); + //psergey4: how is the above supposed to work when + //top_jtrange_tables==FALSE? It will crash right away! table= 0; } else { - for (tab= join_tab, end= tab+tables; tab != end; tab++) + for (tab= top_jtrange_tables?join_tab:NULL; tab; tab= next_linear_tab(this, tab, TRUE)) { if (tab->table) tab->table->file->ha_index_or_rnd_end(); @@ -7998,24 +8273,29 @@ only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables) static void update_depend_map(JOIN *join) { - JOIN_TAB *join_tab=join->join_tab, *end=join_tab+join->tables; - - for (; join_tab != end ; join_tab++) - { - TABLE_REF *ref= &join_tab->ref; - table_map depend_map=0; - Item **item=ref->items; - uint i; - for (i=0 ; i < ref->key_parts ; i++,item++) - depend_map|=(*item)->used_tables(); - ref->depend_map=depend_map & ~OUTER_REF_TABLE_BIT; - depend_map&= ~OUTER_REF_TABLE_BIT; - for (JOIN_TAB **tab=join->map2table; - depend_map ; - tab++,depend_map>>=1 ) - { - if (depend_map & 1) - ref->depend_map|=(*tab)->ref.depend_map; + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; + + while ((jt_range= it++)) + { + for (JOIN_TAB *join_tab=jt_range->start; join_tab != jt_range->end; + join_tab++) + { + TABLE_REF *ref= &join_tab->ref; + table_map depend_map=0; + Item **item=ref->items; + uint i; + for (i=0 ; i < ref->key_parts ; i++,item++) + depend_map|=(*item)->used_tables(); + ref->depend_map=depend_map & ~OUTER_REF_TABLE_BIT; + depend_map&= ~OUTER_REF_TABLE_BIT; + for (JOIN_TAB **tab=join->map2table; + depend_map ; + tab++,depend_map>>=1 ) + { + if (depend_map & 1) + ref->depend_map|=(*tab)->ref.depend_map; + } } } } @@ -8023,7 +8303,7 @@ static void update_depend_map(JOIN *join) /** Update the dependency map for the sort order. */ -static void update_depend_map(JOIN *join, ORDER *order) +static void update_depend_map_for_order(JOIN *join, ORDER *order) { for (; order ; order=order->next) { @@ -8074,17 +8354,25 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, return change_list ? 0 : first_order; // No need to sort ORDER *order,**prev_ptr; - table_map first_table= join->join_tab[join->const_tables].table->map; + table_map first_table; table_map not_const_tables= ~join->const_table_map; table_map ref; + bool first_is_base_table= FALSE; DBUG_ENTER("remove_const"); + if (join->join_tab[join->const_tables].table) + { + first_table= join->join_tab[join->const_tables].table->map; + first_is_base_table= TRUE; + } + + prev_ptr= &first_order; *simple_order= *join->join_tab[join->const_tables].on_expr_ref ? 0 : 1; /* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */ - update_depend_map(join, first_order); + update_depend_map_for_order(join, first_order); for (order=first_order; order ; order=order->next) { table_map order_tables=order->item[0]->used_tables(); @@ -8121,7 +8409,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, DBUG_PRINT("info",("removing: %s", order->item[0]->full_name())); continue; } - if ((ref=order_tables & (not_const_tables ^ first_table))) + if (first_is_base_table && (ref=order_tables & (not_const_tables ^ first_table))) { if (!(order_tables & first_table) && only_eq_ref_tables(join,first_order, ref)) @@ -8197,8 +8485,11 @@ static void clear_tables(JOIN *join) must clear only the non-const tables, as const tables are not re-calculated. */ - for (uint i=join->const_tables ; i < join->tables ; i++) - mark_as_null_row(join->table[i]); // All fields are NULL + for (uint i= 0 ; i < join->tables ; i++) + { + if (!(join->table[i]->map & join->const_table_map)) + mark_as_null_row(join->table[i]); // All fields are NULL + } } /***************************************************************************** @@ -9018,7 +9309,25 @@ static int compare_fields_by_table_order(Item_field *field1, if (outer_ref) return cmp; JOIN_TAB **idx= (JOIN_TAB **) table_join_idx; - cmp= idx[field2->field->table->tablenr]-idx[field1->field->table->tablenr]; + + JOIN_TAB *tab1= idx[field1->field->table->tablenr]; + JOIN_TAB *tab2= idx[field2->field->table->tablenr]; + + /* + if one of the table is inside a merged SJM nest and another one isn't, + compare SJM bush roots of the tables. + */ + if (tab1->bush_root_tab != tab2->bush_root_tab) + { + if (tab1->bush_root_tab) + tab1= tab1->bush_root_tab; + + if (tab2->bush_root_tab) + tab2= tab2->bush_root_tab; + } + + cmp= tab2 - tab1; + return cmp < 0 ? -1 : (cmp ? 1 : 0); } @@ -9106,7 +9415,8 @@ Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels, /* Pick the "head" item: the constant one or the first in the join order - that's not inside some SJM nest. + that's not inside some SJM nest. psergey2: out-of-date comment. It is ok + inside SJM, too. */ if (item_const) head= item_const; @@ -9907,16 +10217,10 @@ static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list) if ((nested_join= table->nested_join)) { nested_join->counter= 0; - //nested_join->n_tables= my_count_bits(nested_join->used_tables & - // ~join->eliminated_tables); nested_join->n_tables= reset_nj_counters(join, &nested_join->join_list); if (!nested_join->n_tables) is_eliminated_nest= TRUE; } - //if (!table->table || (table->table->map & ~join->eliminated_tables)) - //psergey-merge10^ - //if (!table->table && (table->table->map & ~join->eliminated_tables)) - //psergey-merge11^ if ((!table->table && !is_eliminated_nest) || (table->table && (table->table->map & ~join->eliminated_tables))) n++; @@ -10148,6 +10452,9 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, for (i= first_tab; i <= last_tab; i++) reopt_remaining_tables |= join->positions[i].table->table->map; + table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables; + join->cur_sj_inner_tables= 0; + for (i= first_tab; i <= last_tab; i++) { JOIN_TAB *rs= join->positions[i].table; @@ -10173,6 +10480,7 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, if (rs->emb_sj_nest) inner_fanout *= pos.records_read; } + join->cur_sj_inner_tables= save_cur_sj_inner_tables; *reopt_rec_count= rec_count; *reopt_cost= cost; @@ -11181,7 +11489,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, for distinct, as we want the distinct index to be usable in this case too. */ - item->marker == 4 || param->bit_fields_as_long, // psergey-feb17 + item->marker == 4 || param->bit_fields_as_long, force_copy_fields, param->convert_blob_length); @@ -11455,6 +11763,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, share->keys=1; share->uniques= test(using_unique_constraint); table->key_info= table->s->key_info= keyinfo; + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME; keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts; @@ -11470,6 +11780,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, bool maybe_null=(*cur_group->item)->maybe_null; key_part_info->null_bit=0; key_part_info->field= field; + if (cur_group == group) + field->key_start.set_bit(0); key_part_info->offset= field->offset(table->record[0]); key_part_info->length= (uint16) field->key_length(); key_part_info->type= (uint8) field->key_type(); @@ -11527,7 +11839,6 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, indexes on blobs with arbitrary length. Such indexes cannot be used for lookups. */ - //// psergey-merge: using_unique_constraint=1; share->uniques= 1; } null_pack_length-=hidden_null_pack_length; @@ -11540,6 +11851,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, keyinfo->key_parts * sizeof(KEY_PART_INFO)))) goto err; bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO)); + table->keys_in_use_for_query.set_bit(0); + share->keys_in_use.set_bit(0); table->key_info= table->s->key_info= keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL; @@ -11578,6 +11891,13 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, { key_part_info->null_bit=0; key_part_info->field= *reg_field; + (*reg_field)->flags |= PART_KEY_FLAG; + if (key_part_info == keyinfo->key_part) + (*reg_field)->key_start.set_bit(0); + key_part_info->null_bit= (*reg_field)->null_bit; + key_part_info->null_offset= (uint) ((*reg_field)->null_ptr - + (uchar*) table->record[0]); + key_part_info->offset= (*reg_field)->offset(table->record[0]); key_part_info->length= (uint16) (*reg_field)->pack_length(); /* TODO: @@ -12390,8 +12710,7 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) Next_select_func end_select= setup_end_select_func(join); if (join->tables) { - join->join_tab[join->tables-1].next_select= end_select; - + join->join_tab[join->top_jtrange_tables - 1].next_select= end_select; join_tab=join->join_tab+join->const_tables; } join->send_records=0; @@ -12426,9 +12745,9 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure) else { DBUG_ASSERT(join->tables); - error= join->first_select(join,join_tab,0); + error= sub_select(join,join_tab,0); if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS) - error= join->first_select(join,join_tab,1); + error= sub_select(join,join_tab,1); if (error == NESTED_LOOP_QUERY_LIMIT) error= NESTED_LOOP_OK; /* select_limit used */ } @@ -12495,133 +12814,6 @@ int rr_sequential_and_unpack(READ_RECORD *info) /* - Semi-join materialization join function - - SYNOPSIS - sub_select_sjm() - join The join - join_tab The first table in the materialization nest - end_of_records FALSE <=> This call is made to pass another record - combination - TRUE <=> EOF - - DESCRIPTION - This is a join execution function that does materialization of a join - suborder before joining it to the rest of the join. - - The table pointed by join_tab is the first of the materialized tables. - This function first creates the materialized table and then switches to - joining the materialized table with the rest of the join. - - The materialized table can be accessed in two ways: - - index lookups - - full table scan - - RETURN - One of enum_nested_loop_state values -*/ - -enum_nested_loop_state -sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) -{ - int res; - enum_nested_loop_state rc; - - DBUG_ENTER("sub_select_sjm"); - - if (!join_tab->emb_sj_nest) - { - /* - We're handling GROUP BY/ORDER BY, this is the first table, and we've - actually executed the join already and now we're just reading the - result of the join from the temporary table. - Bypass to regular join handling. - Yes, it would be nicer if sub_select_sjm wasn't called at all in this - case but there's no easy way to arrange this. - */ - rc= sub_select(join, join_tab, end_of_records); - DBUG_RETURN(rc); - } - - SJ_MATERIALIZATION_INFO *sjm= join_tab->emb_sj_nest->sj_mat_info; - if (end_of_records) - { - rc= (*join_tab[sjm->tables - 1].next_select)(join, - join_tab + sjm->tables, - end_of_records); - DBUG_RETURN(rc); - } - if (!sjm->materialized) - { - /* - Do the materialization. First, put end_sj_materialize after the last - inner table so we can catch record combinations of sj-inner tables. - */ - Next_select_func next_func= join_tab[sjm->tables - 1].next_select; - join_tab[sjm->tables - 1].next_select= end_sj_materialize; - - /* - Now run the join for the inner tables. The first call is to run the - join, the second one is to signal EOF (this is essential for some - join strategies, e.g. it will make join buffering flush the records) - */ - if ((rc= sub_select(join, join_tab, FALSE)) < 0 || - (rc= sub_select(join, join_tab, TRUE/*EOF*/)) < 0) - { - join_tab[sjm->tables - 1].next_select= next_func; - DBUG_RETURN(rc); /* it's NESTED_LOOP_(ERROR|KILLED)*/ - } - join_tab[sjm->tables - 1].next_select= next_func; - - /* - Ok, materialization finished. Initialize the access to the temptable - */ - sjm->materialized= TRUE; - join_tab->read_record.read_record= join_no_more_records; - if (sjm->is_sj_scan) - { - /* Initialize full scan */ - JOIN_TAB *last_tab= join_tab + (sjm->tables - 1); - init_read_record(&last_tab->read_record, join->thd, - sjm->table, NULL, TRUE, TRUE, FALSE); - - DBUG_ASSERT(last_tab->read_record.read_record == rr_sequential); - last_tab->read_first_record= join_read_record_no_init; - last_tab->read_record.copy_field= sjm->copy_field; - last_tab->read_record.copy_field_end= sjm->copy_field + - sjm->sjm_table_cols.elements; - last_tab->read_record.read_record= rr_sequential_and_unpack; - } - } - - if (sjm->is_sj_scan) - { - /* Do full scan of the materialized table */ - JOIN_TAB *last_tab= join_tab + (sjm->tables - 1); - - Item *save_cond= last_tab->select_cond; - last_tab->set_select_cond(sjm->join_cond, __LINE__); - - rc= sub_select(join, last_tab, end_of_records); - last_tab->set_select_cond(save_cond, __LINE__); - DBUG_RETURN(rc); - } - else - { - /* Do index lookup in the materialized table */ - if ((res= join_read_key2(join_tab, sjm->table, sjm->tab_ref)) == 1) - DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ - if (res || !sjm->in_equality->val_int()) - DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS); - } - rc= (*join_tab[sjm->tables - 1].next_select)(join, - join_tab + sjm->tables, - end_of_records); - DBUG_RETURN(rc); -} - - -/* Fill the join buffer with partial records, retrieve all full matches for them SYNOPSIS @@ -12871,6 +13063,9 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) } join->thd->row_count= 0; + if ((rc= join_tab_execution_startup(join_tab)) < 0) + DBUG_RETURN(rc); + if (join_tab->loosescan_match_tab) join_tab->loosescan_match_tab->found_match= FALSE; @@ -13665,13 +13860,24 @@ int join_init_read_record(JOIN_TAB *tab) return (*tab->read_record.read_record)(&tab->read_record); } -static int +int join_read_record_no_init(JOIN_TAB *tab) { + Copy_field *save_copy, *save_copy_end; + + save_copy= tab->read_record.copy_field; + save_copy_end= tab->read_record.copy_field_end; + + init_read_record(&tab->read_record, tab->join->thd, tab->table, + tab->select,1,1, FALSE); + + tab->read_record.copy_field= save_copy; + tab->read_record.copy_field_end= save_copy_end; + tab->read_record.read_record= rr_sequential_and_unpack; + return (*tab->read_record.read_record)(&tab->read_record); } - static int join_read_first(JOIN_TAB *tab) { @@ -14308,8 +14514,25 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), *****************************************************************************/ /** - @return - 1 if right_item is used removable reference key on left_item + Check if a given equality is guaranteed to be true by use of ref access + + SYNOPSIS + test_if_ref() + root_cond + left_item + right_item + + DESCRIPTION + Check if the given "left_item = right_item" equality is guaranteed to be + true by use of [eq_]ref access method. + + We need root_cond as we can't remove ON expressions even if employed ref + access guarantees that they are true. This is because TODO + + RETURN + TRUE if right_item is used removable reference key on left_item + FALSE Otherwise + */ bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) @@ -14367,7 +14590,8 @@ bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) SYNOPSIS make_cond_for_table() cond Condition to analyze - tables Tables for which "current field values" are available + tables Tables for which "current field values" are available (this + includes used_table) used_table Table that we're extracting the condition for (may also include PSEUDO_TABLE_BITS exclude_expensive_cond Do not push expensive conditions @@ -14401,7 +14625,8 @@ make_cond_for_table(Item *cond, table_map tables, table_map used_table, return make_cond_for_table_from_pred(cond, cond, tables, used_table, exclude_expensive_cond); } - + + static Item * make_cond_for_table_from_pred(Item *root_cond, Item *cond, table_map tables, table_map used_table, @@ -14422,6 +14647,7 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond, */ !((used_table & 1) && cond->is_expensive())) return (COND*) 0; // Already checked + if (cond->type() == Item::COND_ITEM) { if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) @@ -14496,6 +14722,7 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond, */ (!used_table && exclude_expensive_cond && cond->is_expensive())) return (COND*) 0; // Can't check this yet + if (cond->marker == 2 || cond->eq_cmp_result() == Item::COND_OK) return cond; // Not boolean op @@ -14522,14 +14749,29 @@ make_cond_for_table_from_pred(Item *root_cond, Item *cond, } +/* + The difference of this from make_cond_for_table() is that we're in the + following state: + 1. conditions referring to 'tables' have been checked + 2. conditions referring to sjm_tables have been checked, too + 3. We need condition that couldn't be checked in #1 or #2 but + can be checked when we get both (tables | sjm_tables). + +*/ static COND * make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, table_map sjm_tables) { + /* + We assume that conditions that refer to only join prefix tables or + sjm_tables have already been checked. + */ if ((!(cond->used_tables() & ~tables) || !(cond->used_tables() & ~sjm_tables))) return (COND*) 0; // Already checked + + /* AND/OR recursive descent */ if (cond->type() == Item::COND_ITEM) { if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) @@ -15084,8 +15326,6 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, &usable_keys)) < MAX_KEY) { /* Found key that can be used to retrieve data in sorted order */ - //psergey-mrr:if (tab->pre_idx_push_select_cond) - // tab->select_cond= tab->select->cond= tab->pre_idx_push_select_cond; if (tab->ref.key >= 0) { /* @@ -18017,16 +18257,21 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, { table_map used_tables=0; - uchar sjm_nests[MAX_TABLES]; - uint sjm_nests_cur=0; - uint sjm_nests_end= 0; - uint end_table= join->tables; bool printing_materialize_nest= FALSE; uint select_id= join->select_lex->select_number; - for (uint i=0 ; i < end_table ; i++) + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; + while ((jt_range= it++)) + { + if (jt_range != join->join_tab_ranges.head()) + { + select_id= jt_range->start->emb_sj_nest->sj_subq_pred->get_identifier(); + printing_materialize_nest= TRUE; + } + + for (JOIN_TAB *tab= jt_range->start + 0; tab < jt_range->end; tab++) { - JOIN_TAB *tab=join->join_tab+i; TABLE *table=tab->table; TABLE_LIST *table_list= tab->table->pos_in_table_list; char buff[512]; @@ -18059,84 +18304,6 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, join->select_lex->type; item_list.push_back(new Item_string(stype, strlen(stype), cs)); - /* - Special processing for SJ-Materialization nests: print the fake table - and delay printing of the SJM nest contents until later. - */ - uint sj_strategy= join->best_positions[i].sj_strategy; - if (sj_is_materialize_strategy(sj_strategy) && - !printing_materialize_nest) - { - /* table */ - int len= my_snprintf(table_name_buffer, - sizeof(table_name_buffer)-1, - "subselect%d", - tab->emb_sj_nest->sj_subq_pred->get_identifier()); - item_list.push_back(new Item_string(table_name_buffer, len, cs)); - /* partitions */ - if (join->thd->lex->describe & DESCRIBE_PARTITIONS) - item_list.push_back(item_null); - /* type */ - uint type= (sj_strategy == SJ_OPT_MATERIALIZE_SCAN)? JT_ALL : JT_EQ_REF; - item_list.push_back(new Item_string(join_type_str[type], - strlen(join_type_str[type]), - cs)); - /* possible_keys */ - item_list.push_back(new Item_string("unique_key", - strlen("unique_key"), cs)); - if (sj_strategy == SJ_OPT_MATERIALIZE_SCAN) - { - item_list.push_back(item_null); /* key */ - item_list.push_back(item_null); /* key_len */ - item_list.push_back(item_null); /* ref */ - } - else - { - /* key */ - item_list.push_back(new Item_string("unique_key", strlen("unique_key"), cs)); - /* key_len */ - uint klen= tab->emb_sj_nest->sj_mat_info->table->key_info[0].key_length; - uint buflen= longlong2str(klen, keylen_str_buf, 10) - keylen_str_buf; - item_list.push_back(new Item_string(keylen_str_buf, buflen, cs)); - /* ref */ - item_list.push_back(new Item_string("func", strlen("func"), cs)); - } - /* rows */ - ha_rows rows= (sj_strategy == SJ_OPT_MATERIALIZE_SCAN)? - tab->emb_sj_nest->sj_mat_info->rows : 1; - item_list.push_back(new Item_int((longlong)rows, - MY_INT64_NUM_DECIMAL_DIGITS)); - /* filtered */ - if (join->thd->lex->describe & DESCRIBE_EXTENDED) - item_list.push_back(new Item_float(1.0, 2)); - - /* Extra */ - if (need_tmp_table) - { - need_tmp_table=0; - extra.append(STRING_WITH_LEN("; Using temporary")); - } - if (need_order) - { - need_order=0; - extra.append(STRING_WITH_LEN("; Using filesort")); - } - /* Skip initial "; "*/ - const char *str= extra.ptr(); - uint32 extra_len= extra.length(); - if (extra_len) - { - str += 2; - extra_len -= 2; - } - item_list.push_back(new Item_string(str, extra_len, cs)); - - /* Register the nest for further processing: */ - sjm_nests[sjm_nests_end++]= i; - i += join->best_positions[i].n_sj_tables-1; - goto loop_end; - } - if (tab->type == JT_ALL && tab->select && tab->select->quick) { quick_type= tab->select->quick->get_type(); @@ -18157,6 +18324,16 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, table->derived_select_number); item_list.push_back(new Item_string(table_name_buffer, len, cs)); } + else if (tab->bush_children) + { + JOIN_TAB *ctab= tab->bush_children->start; + /* table */ + int len= my_snprintf(table_name_buffer, + sizeof(table_name_buffer)-1, + "<subquery%d>", + ctab->emb_sj_nest->sj_subq_pred->get_identifier()); + item_list.push_back(new Item_string(table_name_buffer, len, cs)); + } else { TABLE_LIST *real_table= table->pos_in_table_list; @@ -18251,7 +18428,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } else { - if (table_list->schema_table && + if (table_list && /* SJM bushes don't have table_list */ + table_list->schema_table && table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE) { const char *tmp_buff; @@ -18282,7 +18460,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } /* Add "rows" field to item_list. */ - if (table_list->schema_table) + if (table_list /* SJM bushes don't have table_list */ && + table_list->schema_table) { /* in_rows */ if (join->thd->lex->describe & DESCRIBE_EXTENDED) @@ -18296,9 +18475,9 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, if (tab->select && tab->select->quick) examined_rows= tab->select->quick->records; else if (tab->type == JT_NEXT || tab->type == JT_ALL) - examined_rows= tab->limit ? tab->limit : tab->table->file->records(); + examined_rows= tab->limit ? tab->limit : tab->records; else - examined_rows=(ha_rows)join->best_positions[i].records_read; + examined_rows=(ha_rows)tab->records_read; item_list.push_back(new Item_int((longlong) (ulonglong) examined_rows, MY_INT64_NUM_DECIMAL_DIGITS)); @@ -18319,7 +18498,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, */ float f= 0.0; if (examined_rows) - f= (float) (100.0 * join->best_positions[i].records_read / + f= (float) (100.0 * tab->records_read/*join->best_positions[i].records_read*/ / examined_rows); item_list.push_back(new Item_float(f, 2)); } @@ -18404,7 +18583,8 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, extra.append(STRING_WITH_LEN("; Using where")); } } - if (table_list->schema_table && + if (table_list /* SJM bushes don't have table_list */ && + table_list->schema_table && table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE) { if (!table_list->table_open_method) @@ -18483,25 +18663,6 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } } - /* - if (sj_is_materialize_strategy(sj_strategy)) - { - if (join->best_positions[i].n_sj_tables == 1) - extra.append(STRING_WITH_LEN("; Materialize")); - else - { - last_sjm_table= i + join->best_positions[i].n_sj_tables - 1; - extra.append(STRING_WITH_LEN("; Start materialize")); - } - if (sj_strategy == SJ_OPT_MATERIALIZE_SCAN) - extra.append(STRING_WITH_LEN("; Scan")); - } - else if (last_sjm_table == i) - { - extra.append(STRING_WITH_LEN("; End materialize")); - } - */ - for (uint part= 0; part < tab->ref.key_parts; part++) { if (tab->ref.cond_guards[part]) @@ -18511,7 +18672,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } } - if (i > 0 && tab[-1].next_select == sub_select_cache) + if ((tab != jt_range->start) && tab[-1].next_select == sub_select_cache) extra.append(STRING_WITH_LEN("; Using join buffer")); /* Skip initial "; "*/ @@ -18524,20 +18685,13 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, } item_list.push_back(new Item_string(str, len, cs)); } - loop_end: - if (i+1 == end_table && sjm_nests_cur != sjm_nests_end) - { - printing_materialize_nest= TRUE; - i= sjm_nests[sjm_nests_cur++] - 1; - end_table= (i+1) + join->best_positions[i+1].n_sj_tables; - select_id= join->join_tab[i+1].emb_sj_nest->sj_subq_pred->get_identifier(); - } // For next iteration used_tables|=table->map; if (result->send_data(item_list)) join->error= 1; } + } } for (SELECT_LEX_UNIT *unit= join->select_lex->first_inner_unit(); unit; @@ -18785,6 +18939,14 @@ void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type); str->append(')'); } + else if (jtbm_subselect) + { + str->append(STRING_WITH_LEN(" <materialize> (")); + subselect_hash_sj_engine *hash_engine; + hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine; + hash_engine->materialize_engine->print(str, query_type); + str->append(')'); + } else { const char *cmp_name; // Name to compare with alias diff --git a/sql/sql_select.h b/sql/sql_select.h index b75813fcbcb..083220d6d6c 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -142,13 +142,25 @@ enum enum_nested_loop_state typedef enum_nested_loop_state (*Next_select_func)(JOIN *, struct st_join_table *, bool); + +/* + Function prototype for reading first record for a join tab + + RETURN + 0 - OK + -1 - Record not found + Other - Error +*/ typedef int (*Read_record_func)(struct st_join_table *tab); + Next_select_func setup_end_select_func(JOIN *join); int rr_sequential(READ_RECORD *info); +int rr_sequential_and_unpack(READ_RECORD *info); class JOIN_CACHE; class SJ_TMP_TABLE; +class JOIN_TAB_RANGE; typedef struct st_join_table { st_join_table() {} /* Remove gcc warning */ @@ -173,6 +185,21 @@ typedef struct st_join_table { st_join_table *last_inner; /**< last table table for embedding outer join */ st_join_table *first_upper; /**< first inner table for embedding outer join */ st_join_table *first_unmatched; /**< used for optimization purposes only */ + + /* + For join tabs that are inside an SJM bush: root of the bush + */ + st_join_table *bush_root_tab; + + /* TRUE <=> This join_tab is inside an SJM bush and is the last leaf tab here */ + bool last_leaf_in_bush; + + /* + ptr - this is a bush, and ptr points to description of child join_tab + range + NULL - this join tab has no bush children + */ + JOIN_TAB_RANGE *bush_children; /* Special content for EXPLAIN 'Extra' column or NULL if none */ const char *info; @@ -210,8 +237,13 @@ typedef struct st_join_table { method (but not 'index' for some reason), i.e. this matches method which E(#records) is in found_records. */ - ha_rows read_time; + double read_time; + + ha_rows records_read; + /* Startup cost for execution */ + double startup_cost; + table_map dependent,key_dependent; uint use_quick,index; uint status; ///< Save status for cache @@ -289,7 +321,7 @@ typedef struct st_join_table { /* Semi-join strategy to be used for this join table. This is a copy of POSITION::sj_strategy field. This field is set up by the - fix_semijion_strategies_for_picked_join_order. + fix_semijoin_strategies_for_picked_join_order. */ uint sj_strategy; @@ -475,12 +507,14 @@ protected: context can be accessed. */ JOIN *join; - - /* - Cardinality of the range of join tables whose fields can be put into the - cache. (A table from the range not necessarily contributes to the cache.) + /* + JOIN_TAB of the first table that can have it's fields in the join cache. + That is, tables in the [start_tab, tab) range can have their fields in the + join cache. + If a join tab in the range represents an SJM-nest, then all tables from the + nest can have their fields in the join cache, too. */ - uint tables; + JOIN_TAB *start_tab; /* The total number of flag and data fields that can appear in a record @@ -1213,9 +1247,6 @@ enum_nested_loop_state sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool end_of_records); -enum_nested_loop_state sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, - bool end_of_records); - enum_nested_loop_state end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), bool end_of_records); @@ -1362,16 +1393,31 @@ inline bool sj_is_materialize_strategy(uint strategy) } +class JOIN_TAB_RANGE: public Sql_alloc +{ +public: + JOIN_TAB *start; + JOIN_TAB *end; +}; + + class JOIN :public Sql_alloc { JOIN(const JOIN &rhs); /**< not implemented */ JOIN& operator=(const JOIN &rhs); /**< not implemented */ public: - JOIN_TAB *join_tab,**best_ref; + JOIN_TAB *join_tab, **best_ref; JOIN_TAB **map2table; ///< mapping between table indexes and JOIN_TABs JOIN_TAB *join_tab_save; ///< saved join_tab for subquery reexecution + + List<JOIN_TAB_RANGE> join_tab_ranges; + + /* + Base tables participating in the join. After join optimization is done, the + tables are stored in the join order (but the only really important part is + that const tables are first). + */ TABLE **table; - TABLE **all_tables; /** The table which has an index that allows to produce the requried ordering. A special value of 0x1 means that the ordering will be produced by @@ -1381,6 +1427,13 @@ public: uint tables; /**< Number of tables in the join */ uint outer_tables; /**< Number of tables that are not inside semijoin */ uint const_tables; + /* + Number of tables in the top join_tab array. Normally this matches + (join_tab_ranges.head()->end - join_tab_ranges.head()->start). + + We keep it here so that it is saved/restored with JOIN::restore_tmp. + */ + uint top_jtrange_tables; uint send_group_parts; bool group; /**< If query contains GROUP BY clause */ /** @@ -1456,7 +1509,6 @@ public: /* We also maintain a stack of join optimization states in * join->positions[] */ /******* Join optimization state members end *******/ - Next_select_func first_select; /* The cost of best complete join plan found so far during optimization, after optimization phase - cost of picked join order (not taking into @@ -1569,8 +1621,12 @@ public: bool union_part; ///< this subselect is part of union bool optimized; ///< flag to avoid double optimization in EXPLAIN + /* + Subqueries that will need to be converted to semi-join nests, including + those converted to jtbm nests. The list is emptied when conversion is done. + */ Array<Item_in_subselect> sj_subselects; - + /* Temporary tables used to weed-out semi-join duplicates */ List<TABLE> sj_tmp_tables; /* SJM nests that are executed with SJ-Materialization strategy */ @@ -1604,6 +1660,7 @@ public: join_tab= join_tab_save= 0; table= 0; tables= 0; + top_jtrange_tables= 0; const_tables= 0; eliminated_tables= 0; join_list= 0; @@ -1656,7 +1713,6 @@ public: rollup.state= ROLLUP::STATE_NONE; no_const_tables= FALSE; - first_select= sub_select; } int prepare(Item ***rref_pointer_array, TABLE_LIST *tables, uint wind_num, @@ -1943,6 +1999,7 @@ COND *remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value); int test_if_item_cache_changed(List<Cached_item> &list); void calc_used_field_length(THD *thd, JOIN_TAB *join_tab); int join_init_read_record(JOIN_TAB *tab); +int join_read_record_no_init(JOIN_TAB *tab); void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key); inline Item * and_items(Item* cond, Item *item) { diff --git a/sql/sql_test.cc b/sql/sql_test.cc index 0d4a015f708..4ae5e1dadb3 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -165,58 +165,66 @@ void TEST_filesort(SORT_FIELD *sortorder,uint s_length) void TEST_join(JOIN *join) { - uint i,ref; + uint ref; + int i; + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); + JOIN_TAB_RANGE *jt_range; DBUG_ENTER("TEST_join"); - /* - Assemble results of all the calls to full_name() first, - in order not to garble the tabular output below. - */ - String ref_key_parts[MAX_TABLES]; - for (i= 0; i < join->tables; i++) - { - JOIN_TAB *tab= join->join_tab + i; - for (ref= 0; ref < tab->ref.key_parts; ref++) - { - ref_key_parts[i].append(tab->ref.items[ref]->full_name()); - ref_key_parts[i].append(" "); - } - } - DBUG_LOCK_FILE; VOID(fputs("\nInfo about JOIN\n",DBUG_FILE)); - for (i=0 ; i < join->tables ; i++) + + while ((jt_range= it++)) { - JOIN_TAB *tab=join->join_tab+i; - TABLE *form=tab->table; - char key_map_buff[128]; - fprintf(DBUG_FILE,"%-16.16s type: %-7s q_keys: %s refs: %d key: %d len: %d\n", - form->alias, - join_type_str[tab->type], - tab->keys.print(key_map_buff), - tab->ref.key_parts, - tab->ref.key, - tab->ref.key_length); - if (tab->select) + /* + Assemble results of all the calls to full_name() first, + in order not to garble the tabular output below. + */ + String ref_key_parts[MAX_TABLES]; + for (i= 0; i < (jt_range->end - jt_range->start); i++) { - char buf[MAX_KEY/8+1]; - if (tab->use_quick == 2) - fprintf(DBUG_FILE, - " quick select checked for each record (keys: %s)\n", - tab->select->quick_keys.print(buf)); - else if (tab->select->quick) + JOIN_TAB *tab= jt_range->start + i; + for (ref= 0; ref < tab->ref.key_parts; ref++) { - fprintf(DBUG_FILE, " quick select used:\n"); - tab->select->quick->dbug_dump(18, FALSE); + ref_key_parts[i].append(tab->ref.items[ref]->full_name()); + ref_key_parts[i].append(" "); } - else - VOID(fputs(" select used\n",DBUG_FILE)); } - if (tab->ref.key_parts) + + for (i= 0; i < (jt_range->end - jt_range->start); i++) { - fprintf(DBUG_FILE, - " refs: %s\n", ref_key_parts[i].ptr()); + JOIN_TAB *tab= jt_range->start + i; + TABLE *form=tab->table; + char key_map_buff[128]; + fprintf(DBUG_FILE,"%-16.16s type: %-7s q_keys: %s refs: %d key: %d len: %d\n", + form->alias, + join_type_str[tab->type], + tab->keys.print(key_map_buff), + tab->ref.key_parts, + tab->ref.key, + tab->ref.key_length); + if (tab->select) + { + char buf[MAX_KEY/8+1]; + if (tab->use_quick == 2) + fprintf(DBUG_FILE, + " quick select checked for each record (keys: %s)\n", + tab->select->quick_keys.print(buf)); + else if (tab->select->quick) + { + fprintf(DBUG_FILE, " quick select used:\n"); + tab->select->quick->dbug_dump(18, FALSE); + } + else + VOID(fputs(" select used\n",DBUG_FILE)); + } + if (tab->ref.key_parts) + { + fprintf(DBUG_FILE, + " refs: %s\n", ref_key_parts[i].ptr()); + } } + VOID(fputs("\n",DBUG_FILE)); } DBUG_UNLOCK_FILE; DBUG_VOID_RETURN; diff --git a/sql/sql_union.cc b/sql/sql_union.cc index ee9ff833726..bb97c1f6352 100644 --- a/sql/sql_union.cc +++ b/sql/sql_union.cc @@ -136,6 +136,22 @@ select_union::create_result_table(THD *thd_arg, List<Item> *column_types, } +/** + Reset and empty the temporary table that stores the materialized query result. + + @note The cleanup performed here is exactly the same as for the two temp + tables of JOIN - exec_tmp_table_[1 | 2]. +*/ + +void select_union::cleanup() +{ + table->file->extra(HA_EXTRA_RESET_STATE); + table->file->ha_delete_all_rows(); + free_io_cache(table); + filesort_free_buffers(table,0); +} + + /* initialization procedures before fake_select_lex preparation() @@ -687,6 +703,7 @@ bool st_select_lex_unit::cleanup() { join->tables_list= 0; join->tables= 0; + join->top_jtrange_tables= 0; } error|= fake_select_lex->cleanup(); /* diff --git a/sql/table.cc b/sql/table.cc index 177dfc6fec1..b91073b51f9 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -5148,6 +5148,12 @@ bool st_table::is_children_attached(void) (parent && parent->children_attached)); } + +bool st_table::is_filled_at_execution() +{ + return test(pos_in_table_list->jtbm_subselect); +} + /* Cleanup this table for re-execution. diff --git a/sql/table.h b/sql/table.h index be15c2059a7..6aab237a400 100644 --- a/sql/table.h +++ b/sql/table.h @@ -925,7 +925,13 @@ struct st_table { key_read= 1; file->extra(HA_EXTRA_KEYREAD); DBUG_VOID_RETURN; - } + } + /* + If TRUE, the table is filled at execution phase (and so, the optimizer + should not do things like range analysis or constant table detection on + it). + */ + bool is_filled_at_execution(); inline void disable_keyread() { DBUG_ENTER("disable_keyread"); @@ -1154,7 +1160,7 @@ class Item_in_subselect; 1) table (TABLE_LIST::view == NULL) - base table (TABLE_LIST::derived == NULL) - - subquery - TABLE_LIST::table is a temp table + - FROM-clause subquery - TABLE_LIST::table is a temp table (TABLE_LIST::derived != NULL) - information schema table (TABLE_LIST::schema_table != NULL) @@ -1173,6 +1179,8 @@ class Item_in_subselect; (TABLE_LIST::natural_join != NULL) - JOIN ... USING (TABLE_LIST::join_using_fields != NULL) + - semi-join nest (sj_on_expr!= NULL && sj_subq_pred!=NULL) + 4) jtbm semi-join (jtbm_subselect != NULL) */ class Index_hint; @@ -1215,8 +1223,14 @@ struct TABLE_LIST */ table_map sj_inner_tables; /* Number of IN-compared expressions */ - uint sj_in_exprs; + uint sj_in_exprs; + + /* If this is a non-jtbm semi-join nest: corresponding subselect predicate */ Item_in_subselect *sj_subq_pred; + + /* If this is a jtbm semi-join object: corresponding subselect predicate */ + Item_in_subselect *jtbm_subselect; + SJ_MATERIALIZATION_INFO *sj_mat_info; /* |