diff options
Diffstat (limited to 'sql/sql_select.cc')
-rw-r--r-- | sql/sql_select.cc | 769 |
1 files changed, 487 insertions, 282 deletions
diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 0532c6c000c..9a1dfd83508 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -47,6 +47,7 @@ // print_sjm, print_plan, TEST_join #include "records.h" // init_read_record, end_read_record #include "filesort.h" // filesort_free_buffers +#include "filesort_utils.h" // get_qsort_sort_cost #include "sql_union.h" // mysql_union #include "opt_subselect.h" #include "sql_derived.h" @@ -68,6 +69,7 @@ #include "my_json_writer.h" #include "opt_trace.h" #include "create_tmp_table.h" +#include "optimizer_defaults.h" /* A key part number that means we're using a fulltext scan. @@ -99,14 +101,7 @@ #define crash_if_first_double_is_bigger(A,B) DBUG_ASSERT(((A) == 0.0 && (B) == 0.0) || (A)/(B) < 1.0000001) -#define double_to_rows(A) ((A) >= ((double)HA_POS_ERROR) ? HA_POS_ERROR : (ha_rows) (A)) - -/* Cost for reading a row through an index */ -struct INDEX_READ_COST -{ - double read_cost; - double index_only_cost; -}; +#define double_to_rows(A) ((A) >= ((double)HA_ROWS_MAX) ? HA_ROWS_MAX : (ha_rows) (A)) const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref", "MAYBE_REF","ALL","range","index","fulltext", @@ -257,7 +252,6 @@ static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond, bool is_top_and_level); static Item* part_of_refkey(TABLE *form,Field *field); -uint find_shortest_key(TABLE *table, const key_map *usable_keys); static bool test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, key_map usable_keys, int key, @@ -331,7 +325,8 @@ static bool find_order_in_list(THD *, Ref_ptr_array, TABLE_LIST *, ORDER *, List<Item> &, List<Item> &, bool, bool, bool); static double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, - table_map rem_tables); + table_map rem_tables, + double *records_out); void set_postjoin_aggr_write_func(JOIN_TAB *tab); static Item **get_sargable_cond(JOIN *join, TABLE *table); @@ -433,7 +428,7 @@ bool dbug_user_var_equals_str(THD *thd, const char *name, const char* value) POSITION::POSITION() { table= 0; - records_read= cond_selectivity= read_time= records_out= 0.0; + records_read= cond_selectivity= read_time= records_out= records_init= 0.0; prefix_record_count= 0.0; key= 0; forced_index= 0; @@ -1896,6 +1891,13 @@ int JOIN::optimize() res= build_explain(); optimization_state= JOIN::OPTIMIZATION_DONE; } + + /* + Store the cost of this query into a user variable + TODO: calculate a correct cost for a query with subqueries and UNIONs. + */ + if (select_lex->select_number == 1) + thd->status_var.last_query_cost= best_read; return res; } @@ -2045,6 +2047,7 @@ JOIN::optimize_inner() { DBUG_ENTER("JOIN::optimize_inner"); subq_exit_fl= false; + best_read= 0.0; DEBUG_SYNC(thd, "before_join_optimize"); THD_STAGE_INFO(thd, stage_optimizing); @@ -3588,7 +3591,7 @@ bool JOIN::make_aggr_tables_info() TABLE* table= create_tmp_table(thd, curr_tab->tmp_table_param, all_fields, NULL, distinct, - TRUE, select_options, HA_POS_ERROR, + TRUE, select_options, HA_ROWS_MAX, &empty_clex_str, !need_tmp, keep_row_order); if (!table) @@ -4233,7 +4236,7 @@ bool JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order) { tab->filesort= - new (thd->mem_root) Filesort(order, HA_POS_ERROR, tab->keep_current_rowid, + new (thd->mem_root) Filesort(order, HA_ROWS_MAX, tab->keep_current_rowid, tab->select); if (!tab->filesort) return true; @@ -5270,7 +5273,6 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, DYNAMIC_ARRAY *keyuse_array) { int error= 0; - TABLE *UNINIT_VAR(table); /* inited in all loops */ uint i,table_count,const_count,key; uint sort_space; table_map found_const_table_map, all_table_map; @@ -5331,8 +5333,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, for (s= stat, i= 0; (tables= ti++); s++, i++) { TABLE_LIST *embedding= tables->embedding; + TABLE *table= tables->table; stat_vector[i]=s; - table_vector[i]=s->table=table=tables->table; + table_vector[i]= s->table= table; s->tab_list= tables; table->pos_in_table_list= tables; error= tables->fetch_number_of_rows(); @@ -5465,7 +5468,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, for (s= stat ; s < stat_end ; s++) { - table= s->table; + TABLE *table= s->table; for (JOIN_TAB *t= stat ; t < stat_end ; t++) { if (t->dependent & table->map) @@ -5569,7 +5572,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++) { - table=s->table; + TABLE *table= s->table; if (table->is_filled_at_execution()) continue; @@ -5622,7 +5625,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, (*s->on_expr_ref)->is_expensive())) { // system table int tmp= 0; - s->type=JT_SYSTEM; + s->type= JT_SYSTEM; join->const_table_map|=table->map; set_position(join,const_count++,s,(KEYUSE*) 0); if ((tmp= join_read_const_table(join->thd, s, @@ -5825,19 +5828,20 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, s->startup_cost= 0; if (s->type == JT_SYSTEM || s->type == JT_CONST) { - Json_writer_object table_records(thd); - /* Only one matching row */ - s->found_records= s->records= 1; - s->records_out= 1.0; + ha_rows records= 1; + if (s->type == JT_SYSTEM || s->table->file->stats.records == 0) + records= s->table->file->stats.records; + /* zero or one matching row */ + s->records= s->found_records= records; + s->records_init= s->records_out= rows2double(records); s->read_time=1.0; s->worst_seeks=1.0; - table_records.add_table_name(s) - .add("rows", s->found_records) - .add("cost", s->read_time) - .add("table_type", s->type == JT_CONST ? - "const" : - "system"); + table_records.add_table_name(s). + add("rows", s->found_records). + add("cost", s->read_time). + add("table_type", s->type == JT_CONST ? + "const" : "system"); continue; } /* @@ -5889,7 +5893,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, s->table->pos_in_table_list->is_materialized_derived())) // (3) { bool impossible_range= FALSE; - ha_rows records= HA_POS_ERROR; + ha_rows records= HA_ROWS_MAX; SQL_SELECT *select= 0; Item **sargable_cond= NULL; if (!s->const_keys.is_clear_all()) @@ -5956,6 +5960,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, } else { + double records= 1; join->const_table_map|= s->table->map; set_position(join,const_count++,s,(KEYUSE*) 0); s->type= JT_CONST; @@ -5966,7 +5971,10 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, s->info= ET_IMPOSSIBLE_ON_CONDITION; found_const_table_map|= s->table->map; mark_as_null_row(s->table); // All fields are NULL + records= 0; } + s->records_init= s->records_out= records; + s->found_records= s->records= (ha_rows)records; } } if (records != HA_POS_ERROR) @@ -6055,7 +6063,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, for (i= 0; i < join->table_count ; i++) if (double rr= join->best_positions[i].records_read) records= COST_MULT(records, rr); - rows= records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records; + rows= double_to_rows(records); set_if_smaller(rows, unit->lim.get_select_limit()); join->select_lex->increase_derived_records(rows); } @@ -7697,8 +7705,9 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) { join->positions[idx].table= table; join->positions[idx].key=key; - join->positions[idx].records_read=1.0; /* This is a const table */ - join->positions[idx].records_out=1.0; /* This is a const table */ + join->positions[idx].records_read=1.0; /* This is a const table */ + join->positions[idx].records_out=1.0; /* This is a const table */ + join->positions[idx].records_init=1.0; /* This is a const table */ join->positions[idx].cond_selectivity= 1.0; join->positions[idx].ref_depend_map= 0; @@ -7751,7 +7760,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) TODO: Extend with_found_constraint' to be set for a top level expression of type X=Y where X and Y has fields from current table and at least one field from - one o more previous tables. + one or more previous tables. @see also table_after_join_selectivity() produces selectivity of condition that is @@ -7851,37 +7860,29 @@ INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table, DBUG_ENTER("cost_for_index_read"); rows_adjusted= MY_MIN(rows2double(records), (double) thd->variables.max_seeks_for_key); + set_if_bigger(rows_adjusted, 1); + #ifdef OLD_CODE_LIMITED_SEEKS set_if_smaller(rows_adjusted, worst_seeks); #endif if (file->is_clustering_key(key)) { - cost.index_only_cost= file->ha_read_time(key, 1, (ha_rows)rows_adjusted); - /* - Same computation as in ha_read_and_copy_time() - We do it explicitely here as we want to use the original value of - records to compute the record copy cost. - */ - cost.read_cost= (cost.index_only_cost + - rows2double(records) * ROW_COPY_COST_THD(thd)); + cost.index_only_cost= + file->ha_keyread_clustered_and_copy_time(key, 1, rows_adjusted, 0); + /* There is no 'index_only_read' with a clustered index */ + cost.read_cost= cost.index_only_cost; } else if (table->covering_keys.is_set(key) && !table->no_keyread) { - cost.index_only_cost= file->ha_keyread_time(key, 1, (ha_rows)rows_adjusted); + cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0); /* Same computation as in ha_keyread_and_copy_time() */ cost.read_cost= (cost.index_only_cost + - rows2double(records) * KEY_COPY_COST_THD(thd)); + rows2double(records) * file->KEY_COPY_COST); } else { - cost.index_only_cost= file->ha_keyread_time(key, 1, (ha_rows) rows_adjusted); - /* - Note that ha_read_time() + ..ROW_COPY_COST should be same - as ha_rnd_pos_time(). - */ - cost.read_cost= (cost.index_only_cost + - file->ha_read_time(key, 0, (ha_rows)rows_adjusted) + - rows2double(records) * ROW_COPY_COST_THD(thd)); + cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0); + cost.read_cost= (cost.index_only_cost + file->ha_rnd_pos_time(records)); } DBUG_PRINT("statistics", ("index_cost: %.3f full_cost: %.3f", cost.index_only_cost, cost.read_cost)); @@ -7950,8 +7951,8 @@ apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg, read even if selectivity (and thus new_records) would be very low. */ new_cost= (MY_MAX(cost_of_accepted_rows, - ranges * KEY_LOOKUP_COST * io_cost * - table->file->optimizer_cache_cost) + + ranges * table->file->KEY_LOOKUP_COST + + ranges * io_cost * table->file->DISK_READ_RATIO) + cost_of_rejected_rows + filter_lookup_cost); new_total_cost= ((new_cost + new_records * WHERE_COST_THD(thd)) * prev_records + filter_startup_cost); @@ -8015,6 +8016,24 @@ apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg, None */ +struct best_plan +{ + double cost; // Smallest cost found + double records; // Old 'Records' + double records_read; // Records accessed + double records_after_filter; // Records_read + filter + double records_out; // Smallest record count seen + Range_rowid_filter_cost_info *filter; // Best filter + KEYUSE *key; // Best key + SplM_plan_info *spl_plan; + table_map ref_depends_map; + enum join_type type; + uint forced_index; + uint max_key_part; + bool uses_jbuf; +}; + + void best_access_path(JOIN *join, JOIN_TAB *s, @@ -8030,14 +8049,7 @@ best_access_path(JOIN *join, uint use_cond_selectivity= thd->variables.optimizer_use_condition_selectivity; TABLE *table= s->table; - KEYUSE *best_key= 0; - uint best_max_key_part= 0; - uint best_forced_index= MAX_KEY, forced_index= MAX_KEY; my_bool found_constraint= 0; - double best_cost= DBL_MAX; - double records= DBL_MAX; - double records_out= table->stat_records() * table->cond_selectivity; - table_map best_ref_depends_map= 0; /* key_dependent is 0 if all key parts could be used or if there was an EQ_REF table found (which uses all key parts). In other words, we cannot @@ -8045,18 +8057,29 @@ best_access_path(JOIN *join, Otherwise it's a bitmap of tables that could improve key usage. */ table_map key_dependent= 0; - Range_rowid_filter_cost_info *best_filter= 0; double tmp; ha_rows rec; - bool best_uses_jbuf= FALSE; MY_BITMAP *eq_join_set= &s->table->eq_join_set; KEYUSE *hj_start_key= 0; - SplM_plan_info *spl_plan= 0; - enum join_type best_type= JT_UNKNOWN, type= JT_UNKNOWN; Loose_scan_opt loose_scan_opt; + struct best_plan best; Json_writer_object trace_wrapper(thd, "best_access_path"); DBUG_ENTER("best_access_path"); + best.cost= DBL_MAX; + best.records= DBL_MAX; + best.records_read= DBL_MAX; + best.records_after_filter= DBL_MAX; + best.records_out= table->stat_records() * table->cond_selectivity; + best.filter= 0; + best.key= 0; + best.max_key_part= 0; + best.type= JT_UNKNOWN; + best.forced_index= MAX_KEY; + best.ref_depends_map= 0; + best.uses_jbuf= FALSE; + best.spl_plan= 0; + disable_jbuf= disable_jbuf || idx == join->const_tables; trace_wrapper.add_table_name(s); @@ -8066,7 +8089,7 @@ best_access_path(JOIN *join, loose_scan_opt.init(join, s, remaining_tables); if (table->is_splittable()) - spl_plan= s->choose_best_splitting(record_count, remaining_tables); + best.spl_plan= s->choose_best_splitting(record_count, remaining_tables); if (unlikely(thd->trace_started())) { @@ -8077,10 +8100,10 @@ best_access_path(JOIN *join, if (s->keyuse) { /* Use key if possible */ - KEYUSE *keyuse; - KEYUSE *start_key=0; - double best_records= DBL_MAX, index_only_cost= DBL_MAX; + KEYUSE *keyuse, *start_key= 0; + double index_only_cost= DBL_MAX; uint max_key_part=0; + enum join_type type= JT_UNKNOWN; /* Test how we can use keys */ rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key @@ -8102,7 +8125,7 @@ best_access_path(JOIN *join, key_part_map ref_or_null_part= 0; key_part_map all_parts= 0; double startup_cost= s->startup_cost; - double records_after_filter; + double records_after_filter, records_best_filter, records; Range_rowid_filter_cost_info *filter= 0; if (is_hash_join_key_no(key)) @@ -8333,7 +8356,6 @@ best_access_path(JOIN *join, ((double) (table->s->max_key_length-keyinfo->key_length) / (double) table->s->max_key_length))); set_if_smaller(records, (double)s->records); - set_if_smaller(records_out, records); if (records < 2.0) records=2.0; /* Can't be as good as a unique */ } @@ -8400,6 +8422,8 @@ best_access_path(JOIN *join, (!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) || found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts))) { + double extra_cost= 0; + max_key_part= max_part_bit(found_part); /* ReuseRangeEstimateForRef-3: @@ -8524,7 +8548,7 @@ best_access_path(JOIN *join, a*keyinfo->user_defined_key_parts - rec_per_key)/ (keyinfo->user_defined_key_parts-1); else - records= a; + records= rows2double(s->records); set_if_bigger(records, MIN_ROWS_AFTER_FILTERING); } } @@ -8533,6 +8557,7 @@ best_access_path(JOIN *join, { /* We need to do two key searches to find row */ records *= 2.0; + extra_cost= s->table->file->KEY_LOOKUP_COST; } /* @@ -8562,13 +8587,14 @@ best_access_path(JOIN *join, } /* Limit the number of matched rows */ + set_if_smaller(records, (double) s->records); tmp= records; set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key); INDEX_READ_COST cost= cost_for_index_read(thd, table, key, (ha_rows) tmp, (ha_rows) s->worst_seeks); tmp= cost.read_cost; - index_only_cost= cost.index_only_cost; + index_only_cost= cost.index_only_cost+extra_cost; } else { @@ -8590,7 +8616,7 @@ best_access_path(JOIN *join, if (records == DBL_MAX) // Key not usable continue; - records_after_filter= records; + records_best_filter= records_after_filter= records; /* Check that start_key->key can be used for index access @@ -8604,7 +8630,8 @@ best_access_path(JOIN *join, tmp, index_only_cost, record_count, - &records_out); + &records_best_filter); + set_if_smaller(best.records_out, records_best_filter); if (filter) filter= filter->apply_filter(thd, table, &tmp, &records_after_filter, &startup_cost, @@ -8625,20 +8652,31 @@ best_access_path(JOIN *join, The COST_EPS is here to ensure we use the first key if there are two 'identical keys' that could be used. */ - if (tmp + COST_EPS < best_cost) + if (tmp + COST_EPS < best.cost) { trace_access_idx.add("chosen", true); - best_cost= tmp; + best.cost= tmp; /* We use 'records' instead of 'records_after_filter' here as we want to have EXPLAIN print the number of rows found by the key access. */ - best_records= records; // Records before filter! - best_key= start_key; - best_max_key_part= max_key_part; - best_ref_depends_map= found_ref; - best_filter= filter; - best_type= type; + best.records= records; // Records before filter! + best.records_read= records; + /* + If we are using 'use_cond_selectivity > 1' then + table_after_join_selectivity() may take into account other + filters that what is currently used so we have to use + records_after_filter. If 'use_cond_selectivity <= 1 then we + can use information from the best filter. + */ + best.records_after_filter= ((use_cond_selectivity > 1) ? + records_after_filter : + records_best_filter); + best.key= start_key; + best.max_key_part= max_key_part; + best.ref_depends_map= found_ref; + best.filter= filter; + best.type= type; } else if (unlikely(thd->trace_started())) { @@ -8646,9 +8684,8 @@ best_access_path(JOIN *join, add("chosen", false). add("cause", cause ? cause : "cost"); } - set_if_smaller(records_out, records); + set_if_smaller(best.records_out, records); } /* for each key */ - records= best_records; } else { @@ -8671,7 +8708,7 @@ best_access_path(JOIN *join, /* Add dependency for sub queries */ key_dependent|= s->embedded_dependent; - } /* if (s->keyuse) */ + } /* if (s->keyuse) */ /* Check that s->key_dependent contains all used_tables found in s->keyuse */ @@ -8687,7 +8724,7 @@ best_access_path(JOIN *join, (1) s is inner table of semi-join -> join cache is allowed for semijoins (2) s is inner table of outer join -> join cache is allowed for outer joins */ - if (idx > join->const_tables && best_key == 0 && + if (idx > join->const_tables && best.key == 0 && (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) && join->max_allowed_join_cache_level > 2 && !bitmap_is_clear_all(eq_join_set) && !disable_jbuf && @@ -8696,11 +8733,11 @@ best_access_path(JOIN *join, (!(table->map & join->outer_join) || join->allowed_outer_join_with_cache)) // (2) { - double refills, cmp_time; + double refills, row_copy_cost, cmp_time; /* Estimate the cost of the hash join access to the table */ - double rnd_records= matching_candidates_in_table(s, found_constraint, + double rnd_records= matching_candidates_in_table(s, 0, use_cond_selectivity); - set_if_smaller(records_out, rnd_records); + set_if_smaller(best.records_out, rnd_records); /* The following cost calculation is identical to the cost calculation for @@ -8729,18 +8766,22 @@ best_access_path(JOIN *join, We assume here that, thanks to the hash, we don't have to compare all row combinations, only a HASH_FANOUT (10%) rows in the cache. */ - cmp_time= (rnd_records * record_count * HASH_FANOUT * - (ROW_COPY_COST_THD(thd) * JOIN_CACHE_ROW_COPY_COST_FACTOR + + row_copy_cost= (ROW_COPY_COST_THD(thd) * 2 * + JOIN_CACHE_ROW_COPY_COST_FACTOR(thd)); + cmp_time= (record_count * row_copy_cost + + rnd_records * record_count * HASH_FANOUT * + ((idx - join->const_tables) * row_copy_cost + WHERE_COST_THD(thd))); tmp= COST_ADD(tmp, cmp_time); - best_cost= tmp; - records= rnd_records; - best_key= hj_start_key; - best_ref_depends_map= 0; - best_uses_jbuf= TRUE; - best_filter= 0; - best_type= JT_HASH; + best.cost= tmp; + best.records_read= best.records_after_filter= rows2double(s->records); + best.records= rnd_records; + best.key= hj_start_key; + best.ref_depends_map= 0; + best.uses_jbuf= TRUE; + best.filter= 0; + best.type= JT_HASH; Json_writer_object trace_access_hash(thd); if (unlikely(trace_access_hash.trace_started())) trace_access_hash. @@ -8748,7 +8789,7 @@ best_access_path(JOIN *join, add("index", "hj-key"). add("rows", rnd_records). add("refills", refills). - add("cost", best_cost). + add("cost", best.cost). add("chosen", true); } @@ -8788,21 +8829,25 @@ best_access_path(JOIN *join, be used for cases with small datasets, which is annoying. */ Json_writer_object trace_access_scan(thd); - if ((records >= s->found_records || best_cost > s->read_time) && // (1) - !(best_key && best_key->key == MAX_KEY) && // (2) + if ((best.records_read >= s->found_records || + best.cost > s->read_time) && // (1) + !(best.key && best.key->key == MAX_KEY) && // (2) !(s->quick && s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2) - best_key && s->quick->index == best_key->key && // (2) - best_max_key_part >= table->opt_range[best_key->key].key_parts) &&// (2) - !((table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) - !table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3) - !(table->force_index_join && best_key && !s->quick) && // (4) - !(best_key && table->pos_in_table_list->jtbm_subselect)) // (5) + best.key && s->quick->index == best.key->key && // (2) + best.max_key_part >= table->opt_range[best.key->key].key_parts) &&// (2) + !((table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) + !table->covering_keys.is_clear_all() && best.key && !s->quick) &&// (3) + !(table->force_index_join && best.key && !s->quick) && // (4) + !(best.key && table->pos_in_table_list->jtbm_subselect)) // (5) { // Check full join - double rnd_records, records_after_filter, org_records; + double records_after_filter, org_records; + double records_best_filter; Range_rowid_filter_cost_info *filter= 0; double startup_cost= s->startup_cost; const char *scan_type= ""; + enum join_type type; + uint forced_index= MAX_KEY; /* Range optimizer never proposes a RANGE if it isn't better @@ -8832,7 +8877,8 @@ best_access_path(JOIN *join, This is done to make records found comparable to what we get with 'ref' access. */ - org_records= records_after_filter= rnd_records= rows2double(s->found_records); + org_records= records_after_filter= rows2double(s->found_records); + records_best_filter= org_records; if (s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) { @@ -8850,11 +8896,13 @@ best_access_path(JOIN *join, range->cost / s->quick->read_time >= 0.9999999)); filter= - table->best_range_rowid_filter_for_partial_join(key_no, rows2double(range->rows), + table->best_range_rowid_filter_for_partial_join(key_no, + rows2double(range->rows), range->find_cost, range->index_only_cost, record_count, - &records_out); + &records_best_filter); + set_if_smaller(best.records_out, records_best_filter); if (filter) { double filter_cost= range->fetch_cost; @@ -8883,20 +8931,18 @@ best_access_path(JOIN *join, { type= JT_INDEX_MERGE; } - set_if_smaller(records_out, records_after_filter); loose_scan_opt.check_range_access(join, idx, s->quick); } else { /* We will now calculate cost of scan, with or without join buffer */ - rnd_records= matching_candidates_in_table(s, found_constraint, - use_cond_selectivity); - records_after_filter= rnd_records; - set_if_smaller(records_out, rnd_records); + records_after_filter= matching_candidates_in_table(s, 0, + use_cond_selectivity); + DBUG_ASSERT(records_after_filter <= s->records); - org_records= rows2double(s->records); + set_if_smaller(best.records_out, records_after_filter); - DBUG_ASSERT(rnd_records <= s->records); + org_records= rows2double(s->records); /* Estimate cost of reading table. */ if (s->cached_forced_index_type) @@ -8907,7 +8953,7 @@ best_access_path(JOIN *join, } else { - if (table->force_index_join && !best_key) + if (table->force_index_join && !best.key) { /* The query is using 'forced_index' and we did not find a usable key. @@ -8951,6 +8997,7 @@ best_access_path(JOIN *join, tmp= s->cached_scan_and_compare_time; type= JT_ALL; } + /* Cache result for other calls */ s->cached_forced_index_type= type; s->cached_forced_index_cost= tmp; s->cached_forced_index= forced_index; @@ -8977,7 +9024,7 @@ best_access_path(JOIN *join, else { /* Scan trough join cache */ - double cmp_time, refills; + double cmp_time, row_copy_cost, refills; /* Calculate cost of checking the the WHERE for this table. @@ -8995,13 +9042,16 @@ best_access_path(JOIN *join, /* We come here only if there are already rows in the join cache */ DBUG_ASSERT(idx != join->const_tables); /* - Cost of moving each row from each previous table from the join cache - to it's table record and comparing it with the found and accepted - row. + Cost of: + - Copying all previous record combinations to the join cache + - Copying the tables from the join cache to table records + - Checking the WHERE against the final row combination */ - cmp_time= (rnd_records * record_count * - (ROW_COPY_COST_THD(thd) * (idx - join->const_tables) * - JOIN_CACHE_ROW_COPY_COST_FACTOR + + row_copy_cost= (ROW_COPY_COST_THD(thd) * + JOIN_CACHE_ROW_COPY_COST_FACTOR(thd)); + cmp_time= (record_count * row_copy_cost + + records_after_filter * record_count * + ((idx - join->const_tables) * row_copy_cost + WHERE_COST_THD(thd))); tmp= COST_ADD(tmp, cmp_time); } @@ -9017,10 +9067,10 @@ best_access_path(JOIN *join, trace_access_scan. add("access_type", type == JT_ALL ? scan_type : join_type_str[type]). - add("rows", org_records). - add("rows_after_scan", rnd_records). - add("rows_after_filter", records_after_filter). - add("cost", tmp); + add("rows", org_records). + add("rows_after_filter", records_after_filter). + add("rows_out", best.records_out). + add("cost", tmp); if (type == JT_ALL) { trace_access_scan.add("index_only", @@ -9028,27 +9078,38 @@ best_access_path(JOIN *join, } } - if (tmp + COST_EPS < best_cost) + if (tmp + COST_EPS < best.cost) { /* If the table has a range (s->quick is set) make_join_select() will ensure that this will be used */ - best_cost= tmp; - records= rnd_records; - best_key= 0; - best_forced_index= forced_index; + best.cost= tmp; + best.records_read= org_records; // Records accessed + best.records= records_after_filter; // Records to be checked with WHERE + /* + If we are using 'use_cond_selectivity > 1' then + table_after_join_selectivity may take into account other + filters that what is currently used so we have to use + records_after_filter. If 'use_cond_selectivity <= 1 then we + can use information from the best filter. + */ + best.records_after_filter= ((use_cond_selectivity > 1) ? + records_after_filter : + records_best_filter); + best.key= 0; + best.forced_index= forced_index; /* filter is only set if s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE */ - best_filter= filter; + best.filter= filter; /* range/index_merge/ALL/index access method are "independent", so: */ - best_ref_depends_map= 0; - best_uses_jbuf= MY_TEST(!disable_jbuf && !((table->map & + best.ref_depends_map= 0; + best.uses_jbuf= MY_TEST(!disable_jbuf && !((table->map & join->outer_join))); - spl_plan= 0; - best_type= type; + best.spl_plan= 0; + best.type= type; trace_access_scan.add("chosen", true); } else @@ -9063,29 +9124,33 @@ best_access_path(JOIN *join, add("cause", "cost"); } + crash_if_first_double_is_bigger(best.records_out, best.records); + crash_if_first_double_is_bigger(best.records_out, best.records_read); + /* Update the cost information for the current partial plan */ - crash_if_first_double_is_bigger(records_out, records); - pos->records_read= records; - pos->records_out= records_out; - pos->read_time= best_cost; - pos->key= best_key; - pos->forced_index= best_forced_index; - pos->type= best_type; + pos->records_init= best.records_read; + pos->records_after_filter= best.records_after_filter; + pos->records_read= best.records; + pos->records_out= best.records_out; + pos->read_time= best.cost; + pos->key= best.key; + pos->forced_index= best.forced_index; + pos->type= best.type; pos->table= s; - pos->ref_depend_map= best_ref_depends_map; + pos->ref_depend_map= best.ref_depends_map; pos->loosescan_picker.loosescan_key= MAX_KEY; - pos->use_join_buffer= best_uses_jbuf; - pos->spl_plan= spl_plan; - pos->range_rowid_filter_info= best_filter; - pos->key_dependent= (best_type == JT_EQ_REF ? (table_map) 0 : + pos->use_join_buffer= best.uses_jbuf; + pos->spl_plan= best.spl_plan; + pos->range_rowid_filter_info= best.filter; + pos->key_dependent= (best.type == JT_EQ_REF ? (table_map) 0 : key_dependent & remaining_tables); loose_scan_opt.save_to_position(s, loose_scan_pos); - if (!best_key && - idx == join->const_tables && + if (!best.key && + idx == join->const_tables && // First table table == join->sort_by_table && - join->unit->lim.get_select_limit() >= records) + join->unit->lim.get_select_limit() >= best.records) // QQQ Why? { trace_access_scan.add("use_tmp_table", true); join->sort_by_table= (TABLE*) 1; // Must use temporary table @@ -9320,15 +9385,6 @@ choose_plan(JOIN *join, table_map join_tables, TABLE_LIST *emb_sjm_nest) DBUG_RETURN(TRUE); } - /* - Store the cost of this query into a user variable - Don't update last_query_cost for statements that are not "flat joins" : - i.e. they have subqueries, unions or call stored procedures. - TODO: calculate a correct cost for a query with subqueries and UNIONs. - */ - if (join->thd->lex->is_single_level_stmt()) - join->thd->status_var.last_query_cost= join->best_read; - join->emb_sjm_nest= 0; DBUG_RETURN(FALSE); } @@ -9595,6 +9651,8 @@ optimize_straight_join(JOIN *join, table_map remaining_tables) { POSITION *position= join->positions + idx; Json_writer_object trace_one_table(thd); + double original_record_count, current_record_count; + if (unlikely(thd->trace_started())) trace_plan_prefix(join, idx, remaining_tables); /* Find the best access method from 's' to the current partial plan */ @@ -9603,22 +9661,71 @@ optimize_straight_join(JOIN *join, table_map remaining_tables) position, &loose_scan_pos); /* Compute the cost of the new plan extended with 's' */ - record_count= COST_MULT(record_count, position->records_read); + current_record_count= COST_MULT(record_count, position->records_out); read_time= COST_ADD(read_time, position->read_time); - optimize_semi_joins(join, remaining_tables, idx, &record_count, &read_time, - &loose_scan_pos); + original_record_count= current_record_count; + optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count, + &read_time, &loose_scan_pos); + if (position->sj_strategy != SJ_OPT_NONE && original_record_count) + { + /* Adjust records_out to contain the final number of rows */ + double ratio= current_record_count / original_record_count; + /* QQQ This is just to stop an assert later */ + if (ratio < 1) + position->records_out*= ratio; + } + remaining_tables&= ~(s->table->map); - double pushdown_cond_selectivity= 1.0; - if (use_cond_selectivity > 1) + if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE) + { + double pushdown_cond_selectivity, records_out; pushdown_cond_selectivity= table_after_join_selectivity(join, idx, s, - remaining_tables); - position->cond_selectivity= pushdown_cond_selectivity; + remaining_tables, + &records_out); + if (unlikely(thd->trace_started()) && + pushdown_cond_selectivity != 1.0) + { + trace_one_table. + add("pushdown_cond_selectivity", pushdown_cond_selectivity). + add("rows_out", records_out); + } + position->cond_selectivity= pushdown_cond_selectivity; + position->records_out= records_out; + current_record_count= COST_MULT(record_count, records_out); + } + else + position->cond_selectivity= 1.0; ++idx; + record_count= current_record_count; } if (join->sort_by_table && join->sort_by_table != join->positions[join->const_tables].table->table) - read_time+= record_count; // We have to make a temp table + { + /* + We may have to make a temp table, note that this is only a + heuristic since we cannot know for sure at this point if we + we are going to use addon fields or to have flush sorting to + disk. We also don't know the temporary table will be in memory + or disk. + The following calculation takes a middle ground where assume + we can sort the keys in memory but have to use a disk based + temporary table to retrive the rows. + This cost is probably much bigger than it has to be... + */ + double sort_cost; + sort_cost= (get_qsort_sort_cost((ha_rows)record_count, 0) + + record_count * + DISK_TEMPTABLE_LOOKUP_COST(thd)); + { + if (unlikely(thd->trace_started())) + { + Json_writer_object trace_one_table(thd); + trace_one_table.add("estimated_cost_for_sorting", sort_cost); + } + } + read_time= COST_ADD(read_time, sort_cost); + } memcpy((uchar*) join->best_positions, (uchar*) join->positions, sizeof(POSITION)*idx); join->join_record_count= record_count; @@ -9997,8 +10104,7 @@ double JOIN::get_examined_rows() COST_MULT((double) (tab->get_examined_rows()), prev_fanout)); prev_tab= tab; } - examined_rows= (double) - (records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records); + examined_rows= double_to_rows(records); return examined_rows; } @@ -10129,9 +10235,10 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, @brief Get the selectivity of conditions when joining a table - @param join The optimized join - @param s The table to be joined for evaluation - @param rem_tables The bitmap of tables to be joined later + @param join The optimized join + @param s The table to be joined for evaluation + @param rem_tables The bitmap of tables to be joined later + @param new_records_out OUT Set to number of rows accepted @detail Get selectivity of conditions that can be applied when joining this table @@ -10145,12 +10252,14 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, condition, "COND(this_table) AND COND(this_table, previous_tables)". @retval - selectivity of the conditions imposed on the rows of s + selectivity of the conditions imposed on the rows of s related to + the rows that we are expected to read (position->records_init). */ static double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, - table_map rem_tables) + table_map rem_tables, + double *new_records_out) { uint16 ref_keyuse_steps_buf[MAX_REF_PARTS]; uint ref_keyuse_size= MAX_REF_PARTS; @@ -10158,13 +10267,14 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, Field *field; TABLE *table= s->table; MY_BITMAP *read_set= table->read_set; - double sel= table->cond_selectivity; POSITION *pos= &join->positions[idx]; + double sel, records_out= pos->records_out; uint keyparts= 0; uint found_part_ref_or_null= 0; if (pos->key != 0) { + sel= table->cond_selectivity; /* A ref access or hash join is used for this table. ref access is created from @@ -10338,35 +10448,22 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, } keyuse++; } - } - else - { /* - The table is accessed with full table scan, or quick select. - Selectivity of COND(table) is already accounted for in - matching_candidates_in_table(). - */ - sel= 1.0; - } + If the field f from the table is equal to a field from one the + earlier joined tables then the selectivity of the range conditions + over the field f must be discounted. - /* - If the field f from the table is equal to a field from one the - earlier joined tables then the selectivity of the range conditions - over the field f must be discounted. - - We need to discount selectivity only if we're using ref-based - access method (and have sel!=1). - If we use ALL/range/index_merge, then sel==1, and no need to discount. - */ - if (pos->key != NULL) - { + We need to discount selectivity only if we're using ref-based + access method (and have sel!=1). + If we use ALL/range/index_merge, then sel==1, and no need to discount. + */ for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++) { if (!bitmap_is_set(read_set, field->field_index) || !field->next_equal_field) - continue; - for (Field *next_field= field->next_equal_field; - next_field != field; + continue; + for (Field *next_field= field->next_equal_field; + next_field != field; next_field= next_field->next_equal_field) { if (!(next_field->table->map & rem_tables) && @@ -10381,14 +10478,39 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, } } } + /* + We have now calculated a more exact 'records_out' taking more index + costs into account. + pos->records_out previously contained the smallest record count for + all range or ref access, which should not be smaller than what we + calculated above. + */ + records_out= pos->records_after_filter * sel; + set_if_smaller(records_out, pos->records_out); } - sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, + sel= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, keyparts, ref_keyuse_steps); + records_out*= sel; + + /* + Update sel to be relative pos->records_read as that is what some old + code expects. Newer code should just use 'position->records_out' instead. + */ + if (pos->records_read == 0) + sel= 1.0; + else + { + sel= records_out / pos->records_read; + DBUG_ASSERT(sel >= 0.0 and sel <= 1.00001); + if (sel > 1.0) + sel= 1.0; + } + exit: + *new_records_out= records_out; if (ref_keyuse_steps != ref_keyuse_steps_buf) my_free(ref_keyuse_steps); - DBUG_ASSERT(sel >= 0.0 and sel <= 1.0); return sel; } @@ -10407,7 +10529,7 @@ check_if_edge_table(POSITION *pos, if ((pos->type == JT_EQ_REF || (pos->type == JT_REF && - pos->records_read == 1 && + pos->records_init == 1 && !pos->range_rowid_filter_info)) && pushdown_cond_selectivity >= 0.999) return SEARCH_FOUND_EDGE; @@ -10600,7 +10722,7 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx, // pplan_cost already too great, stop search continue; - pplan= expand pplan by best_access_method; + pplan= expand plan by best_access_method; remaining_tables= remaining_tables - table T; if (remaining_tables is not an empty set and @@ -10671,8 +10793,8 @@ best_extension_by_limited_search(JOIN *join, { THD *thd= join->thd; /* - 'join' is a partial plan with lower cost than the best plan so far, - so continue expanding it further with the tables in 'remaining_tables'. + 'join' is a partial plan with lower cost than the best plan so far, + so continue expanding it further with the tables in 'remaining_tables'. */ JOIN_TAB *s; double best_record_count= DBL_MAX; @@ -10689,14 +10811,14 @@ best_extension_by_limited_search(JOIN *join, if (dbug_user_var_equals_int(thd, "show_explain_probe_select_id", join->select_lex->select_number)) - dbug_serve_apcs(thd, 1); - ); + dbug_serve_apcs(thd, 1); + ); if (unlikely(thd->check_killed())) // Abort DBUG_RETURN(SEARCH_ABORT); DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time, - "part_plan");); + "part_plan");); status_var_increment(thd->status_var.optimizer_join_prefixes_check_calls); if (join->emb_sjm_nest) @@ -10785,7 +10907,7 @@ best_extension_by_limited_search(JOIN *join, !check_interleaving_with_nj(s)) { table_map real_table_bit= s->table->map; - double current_record_count, current_read_time; + double current_record_count, current_read_time, original_record_count; double partial_join_cardinality; POSITION *position= join->positions + idx, *loose_scan_pos; double pushdown_cond_selectivity; @@ -10802,7 +10924,7 @@ best_extension_by_limited_search(JOIN *join, loose_scan_pos= pos->position+1; /* Compute the cost of the new plan extended with 's' */ - current_record_count= COST_MULT(record_count, position->records_read); + current_record_count= COST_MULT(record_count, position->records_out); current_read_time= COST_ADD(read_time, position->read_time); if (unlikely(trace_one_table.trace_started())) @@ -10811,9 +10933,22 @@ best_extension_by_limited_search(JOIN *join, add("rows_for_plan", current_record_count). add("cost_for_plan", current_read_time); } + original_record_count= current_record_count; optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count, ¤t_read_time, loose_scan_pos); - + if (position->sj_strategy != SJ_OPT_NONE) + { + /* Adjust records_out and current_record_count after semi join */ + double ratio= current_record_count / original_record_count; + /* QQQ This is just to stop an assert later */ + if (ratio < 1.0) + position->records_out*= ratio; + if (unlikely(trace_one_table.trace_started())) + { + trace_one_table.add("sj_rows_out", position->records_out); + trace_one_table.add("sj_rows_for_plan", current_record_count); + } + } /* Expand only partial plans with lower cost than the best QEP so far */ if (current_read_time + COST_EPS >= join->best_read) { @@ -10864,15 +10999,15 @@ best_extension_by_limited_search(JOIN *join, if (best_record_count > current_record_count || best_read_time > current_read_time || (idx == join->const_tables && // 's' is the first table in the QEP - s->table == join->sort_by_table)) + s->table == join->sort_by_table)) { /* Store the current record count and cost as the best possible cost at this level if the following holds: - It's the lowest record number and cost so far - - There is no remaing table that could improve index usage - or we found an EQ_REF or REF key with less than 2 - matching records (good enough). + - There is no remaing table that could improve index usage + or we found an EQ_REF or REF key with less than 2 + matching records (good enough). */ if (best_record_count >= current_record_count && best_read_time >= current_read_time && @@ -10924,17 +11059,26 @@ best_extension_by_limited_search(JOIN *join, } pushdown_cond_selectivity= 1.0; - if (use_cond_selectivity > 1) + /* + TODO: When a semi-join strategy is applied (sj_strategy!=SJ_OPT_NONE), + we should account for selectivity from table_after_join_selectivity(). + (Condition filtering is performed before the semi-join removes some + fanout so this might require moving the code around) + */ + if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE) + { pushdown_cond_selectivity= table_after_join_selectivity(join, idx, s, - remaining_tables & ~real_table_bit); + remaining_tables & ~real_table_bit, + &position->records_out); + } join->positions[idx].cond_selectivity= pushdown_cond_selectivity; - partial_join_cardinality= (current_record_count * - pushdown_cond_selectivity); + partial_join_cardinality= record_count * position->records_out; - if (unlikely(thd->trace_started()) && pushdown_cond_selectivity < 1.0) + if (unlikely(thd->trace_started()) && pushdown_cond_selectivity < 1.0 && + partial_join_cardinality < current_record_count) trace_one_table .add("selectivity", pushdown_cond_selectivity) .add("estimated_join_cardinality", partial_join_cardinality); @@ -10979,11 +11123,21 @@ best_extension_by_limited_search(JOIN *join, { /* We may have to make a temp table, note that this is only a - heuristic since we cannot know for sure at this point. - Hence it may be wrong. + heuristic since we cannot know for sure at this point if we + we are going to use addon fields or to have flush sorting to + disk. We also don't know the temporary table will be in memory + or disk. + The following calculation takes a middle ground where assume + we can sort the keys in memory but have to use a disk based + temporary table to retrive the rows. + This cost is probably much bigger than it has to be... */ - trace_one_table.add("cost_for_sorting", current_record_count); - current_read_time= COST_ADD(current_read_time, current_record_count); + double sort_cost; + sort_cost= (get_qsort_sort_cost((ha_rows)current_record_count,0) + + current_record_count * + DISK_TEMPTABLE_LOOKUP_COST(thd)); + trace_one_table.add("cost_for_sorting", sort_cost); + current_read_time= COST_ADD(current_read_time, sort_cost); } if (current_read_time < join->best_read) { @@ -11318,11 +11472,8 @@ prev_record_reads(const POSITION *positions, uint idx, table_map found_ref) is an inprecise estimate and adding 1 (or, in the worst case, #max_nested_outer_joins=64-1) will not make it any more precise. */ - if (pos->records_read) - { - found= COST_MULT(found, pos->records_read); - found*= pos->cond_selectivity; - } + if (pos->records_out) + found= COST_MULT(found, pos->records_out); } } return found; @@ -11752,7 +11903,7 @@ bool JOIN::get_best_combination() */ SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info; j->records_read= (sjm->is_sj_scan? sjm->rows : 1.0); - j->records_out= j->records_read; + j->records_init= j->records_out= j->records_read; j->records= (ha_rows) j->records_read; j->cond_selectivity= 1.0; JOIN_TAB *jt; @@ -11787,6 +11938,7 @@ bool JOIN::get_best_combination() if (j->type == JT_SYSTEM) goto loop_end; + if (!(keyuse= cur_pos->key)) { if (cur_pos->type == JT_NEXT) // Forced index @@ -11807,17 +11959,19 @@ bool JOIN::get_best_combination() j->range_rowid_filter_info= cur_pos->range_rowid_filter_info; - loop_end: - /* + /* Save records_read in JOIN_TAB so that select_describe()/etc don't have to access join->best_positions[]. */ + j->records_init= cur_pos->records_init; j->records_read= cur_pos->records_read; j->records_out= cur_pos->records_out; + + loop_end: j->cond_selectivity= cur_pos->cond_selectivity; DBUG_ASSERT(j->cond_selectivity <= 1.0); crash_if_first_double_is_bigger(j->records_out, - j->records_read * + j->records_init * (j->range_rowid_filter_info ? j->range_rowid_filter_info->selectivity : 1.0)); @@ -12580,7 +12734,10 @@ make_outerjoin_info(JOIN *join) { if (embedding->is_active_sjm()) { - /* We're trying to walk out of an SJ-Materialization nest. Don't do this. */ + /* + We're trying to walk out of an SJ-Materialization nest. + Don't do this. + */ break; } /* Ignore sj-nests: */ @@ -12861,8 +13018,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) tab->use_quick=1; tab->ref.key= -1; tab->ref.key_parts=0; // Don't use ref key. - join->best_positions[i].records_read= rows2double(tab->quick->records); - /* + join->best_positions[i].records_read= + join->best_positions[i].records_out= + rows2double(tab->quick->records); + /* We will use join cache here : prevent sorting of the first table only and sort at the end. */ @@ -14906,14 +15065,14 @@ void JOIN_TAB::cleanup() /** Estimate the time to get rows of the joined table - Updates found_records, records, cached_scan_time, cached_covering_key, - read_time and cache_scan_and_compare_time + Updates found_records, records, cached_covering_key, read_time and + cache_scan_and_compare_time */ void JOIN_TAB::estimate_scan_time() { THD *thd= join->thd; - double copy_cost= ROW_COPY_COST_THD(thd); + double copy_cost; cached_covering_key= MAX_KEY; if (table->is_created()) @@ -14924,6 +15083,7 @@ void JOIN_TAB::estimate_scan_time() &startup_cost); table->opt_range_condition_rows= records; table->used_stat_records= records; + copy_cost= table->file->ROW_COPY_COST; } else { @@ -14937,21 +15097,38 @@ void JOIN_TAB::estimate_scan_time() if (!table->covering_keys.is_clear_all() && ! table->no_keyread) { cached_covering_key= find_shortest_key(table, &table->covering_keys); - read_time= table->file->ha_key_scan_time(cached_covering_key); - copy_cost= KEY_COPY_COST_THD(thd); + read_time= table->file->ha_key_scan_time(cached_covering_key, records); + copy_cost= 0; // included in ha_key_scan_time } else - read_time= table->file->ha_scan_time(); + { + read_time= table->file->ha_scan_time(records); + copy_cost= 0; + } } } else { + /* + The following is same as calling + TABLE_SHARE::update_optimizer_costs, but without locks + */ + if (table->s->db_type() == heap_hton) + memcpy(&table->s->optimizer_costs, &heap_optimizer_costs, + sizeof(heap_optimizer_costs)); + else + memcpy(&table->s->optimizer_costs, &tmp_table_optimizer_costs, + sizeof(tmp_table_optimizer_costs)); + table->file->set_optimizer_costs(thd); + table->s->optimizer_costs_inited=1 ; + records= table->stat_records(); DBUG_ASSERT(table->opt_range_condition_rows == records); - read_time= records ? (double) records: 10.0;// TODO:fix this stub + read_time= table->file->ha_scan_time(MY_MAX(records, 1000)); // Needs fix.. + copy_cost= table->s->optimizer_costs.row_copy_cost; } + found_records= records; - cached_scan_time= read_time; cached_scan_and_compare_time= (read_time + records * (copy_cost + WHERE_COST_THD(thd))); } @@ -14996,7 +15173,7 @@ ha_rows JOIN_TAB::get_examined_rows() } } else - examined_rows= records_read; + examined_rows= records_init; if (examined_rows >= (double) HA_ROWS_MAX) return HA_ROWS_MAX; @@ -18496,7 +18673,7 @@ table_map JOIN::get_allowed_nj_tables(uint idx) first_alt TRUE <=> Use the LooseScan plan for the first_tab no_jbuf_before Don't allow to use join buffering before this table - reopt_rec_count OUT New output record count + outer_rec_count OUT New output record count reopt_cost OUT New join prefix cost DESCRIPTION @@ -18551,6 +18728,8 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables; join->cur_sj_inner_tables= 0; + double inner_fanout= 1.0; + for (i= first_tab; i <= last_tab; i++) { JOIN_TAB *rs= join->positions[i].table; @@ -18563,31 +18742,43 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, join->positions, i, TRUE, rec_count, &pos, &loose_scan_pos); + if ((i == first_tab && first_alt)) + pos= loose_scan_pos; } else pos= join->positions[i]; - if ((i == first_tab && first_alt)) - pos= loose_scan_pos; - reopt_remaining_tables &= ~rs->table->map; - rec_count= COST_MULT(rec_count, pos.records_read); cost= COST_ADD(cost, pos.read_time); - //TODO: take into account join condition selectivity here - double pushdown_cond_selectivity= 1.0; - table_map real_table_bit= rs->table->map; - if (join->thd->variables.optimizer_use_condition_selectivity > 1) + + double records_out= pos.records_out; + /* + The (i != last_tab) is here to mimic what + best_extension_by_limited_search() does: do not call + table_after_join_selectivity() for the join_tab where the semi-join + strategy is applied + */ + if (i != last_tab && + join->thd->variables.optimizer_use_condition_selectivity > 1) { + table_map real_table_bit= rs->table->map; + double __attribute__((unused)) pushdown_cond_selectivity; pushdown_cond_selectivity= table_after_join_selectivity(join, i, rs, reopt_remaining_tables & - ~real_table_bit); + ~real_table_bit, &records_out); } - (*outer_rec_count) *= pushdown_cond_selectivity; - if (!rs->emb_sj_nest) - *outer_rec_count= COST_MULT(*outer_rec_count, pos.records_read); + rec_count= COST_MULT(rec_count, records_out); + *outer_rec_count= COST_MULT(*outer_rec_count, records_out); + if (rs->emb_sj_nest) + inner_fanout= COST_MULT(inner_fanout, records_out); } + + /* Discount the fanout produced by the subquery */ + if (inner_fanout > 1.0) + *outer_rec_count /= inner_fanout; + join->cur_sj_inner_tables= save_cur_sj_inner_tables; *reopt_cost= cost; @@ -20828,7 +21019,7 @@ TABLE *create_tmp_table_for_schema(THD *thd, TMP_TABLE_PARAM *param, { TABLE *table; Create_tmp_table maker((ORDER *) NULL, false, false, - select_options, HA_POS_ERROR); + select_options, HA_ROWS_MAX); if (!(table= maker.start(thd, param, &table_alias)) || maker.add_schema_fields(thd, table, param, schema_table) || maker.finalize(thd, table, param, do_not_open, keep_row_order)) @@ -21008,7 +21199,6 @@ bool Virtual_tmp_table::sp_set_all_fields_from_item(THD *thd, Item *value) return false; } - bool open_tmp_table(TABLE *table) { int error; @@ -21022,6 +21212,7 @@ bool open_tmp_table(TABLE *table) } table->db_stat= HA_OPEN_KEYFILE; (void) table->file->extra(HA_EXTRA_QUICK); /* Faster */ + table->file->set_optimizer_costs(table->in_use); if (!table->is_created()) { table->set_created(); @@ -24702,31 +24893,40 @@ ok: @return MAX_KEY no suitable key found key index otherwise + + @notes + We should not use keyread_time() as in the case of disk_read_cost= 0 + all keys would be regarded equal. */ uint find_shortest_key(TABLE *table, const key_map *usable_keys) { - double min_cost= DBL_MAX; + size_t min_length= INT_MAX32; uint best= MAX_KEY; - if (!usable_keys->is_clear_all()) + uint possible_keys= usable_keys->bits_set(); + + if (possible_keys) { + if (possible_keys == 1) + return usable_keys->find_first_bit(); + for (uint nr=0; nr < table->s->keys ; nr++) { if (usable_keys->is_set(nr)) { - double cost= table->file->ha_key_scan_time(nr); - if (cost < min_cost) + size_t length= table->key_storage_length(nr); + if (length < min_length) { - min_cost= cost; - best=nr; + min_length= length; + best= nr; } - DBUG_ASSERT(best < MAX_KEY); } } } return best; } + /** Test if a second key is the subkey of the first one. @@ -28244,6 +28444,7 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, // psergey-todo: data for filtering! tracker= &eta->tracker; jbuf_tracker= &eta->jbuf_tracker; + jbuf_unpack_tracker= &eta->jbuf_unpack_tracker; /* Enable the table access time tracker only for "ANALYZE stmt" */ if (thd->lex->analyze_stmt) @@ -28472,12 +28673,13 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, ha_rows examined_rows= get_examined_rows(); eta->rows_set= true; - eta->rows= examined_rows; + eta->rows= double_to_rows(examined_rows); /* "filtered" */ float f= 0.0; if (examined_rows) { +#ifdef OLD_CODE // QQQ double pushdown_cond_selectivity= cond_selectivity; if (pushdown_cond_selectivity != 1.0) f= (float) (100.0 * pushdown_cond_selectivity); @@ -28485,6 +28687,9 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, f= (float) (100.0 * range_rowid_filter_info->selectivity); else f= (float) (100.0 * records_read / examined_rows); +#else + f= (float) (100.0 * records_out / examined_rows); +#endif } set_if_smaller(f, 100.0); eta->filtered_set= true; @@ -28880,9 +29085,9 @@ int JOIN::save_explain_data_intern(Explain_query *output, continue; } - Explain_table_access *eta= (new (output->mem_root) - Explain_table_access(output->mem_root)); + Explain_table_access(output->mem_root, + thd->lex->analyze_stmt)); if (!eta) DBUG_RETURN(1); @@ -29922,7 +30127,7 @@ void JOIN::cache_const_exprs() - If there is no quick select return the full cost from cost_for_index_read() (Doing a full scan with up to 'limit' records) - @param pos Result from best_acccess_path(). Is NULL for + @param pos Result from best_access_path(). Is NULL for single-table UPDATE/DELETE @param table Table to be sorted @param keynr Which index to use @@ -30008,7 +30213,7 @@ static bool get_range_limit_read_cost(const POSITION *pos, /* Calculate the number of rows we have to check if we are - doing a full index scan (as a suitabe range scan was not available). + doing a full index scan (as a suitable range scan was not available). We assume that each of the tested indexes is not correlated with ref_key. Thus, to select first N records we have to scan @@ -30197,12 +30402,12 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, trace_cheaper_ordering.add_table_name(tab); else trace_cheaper_ordering.add_table_name(table); - trace_cheaper_ordering - .add("rows_estimation", rows_estimate) - .add("read_cost", read_time) - .add("filesort_cost", filesort_cost) - .add("filesort_type", filesort_names[filesort_type].str) - .add("fanout", fanout); + trace_cheaper_ordering. + add("rows_estimation", rows_estimate). + add("filesort_cost", filesort_cost). + add("read_cost", read_time). + add("filesort_type", filesort_names[filesort_type].str). + add("fanout", fanout); } Json_writer_array possible_keys(thd,"possible_keys"); |