summaryrefslogtreecommitdiff
path: root/sql/sql_select.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/sql_select.cc')
-rw-r--r--sql/sql_select.cc769
1 files changed, 487 insertions, 282 deletions
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 0532c6c000c..9a1dfd83508 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -47,6 +47,7 @@
// print_sjm, print_plan, TEST_join
#include "records.h" // init_read_record, end_read_record
#include "filesort.h" // filesort_free_buffers
+#include "filesort_utils.h" // get_qsort_sort_cost
#include "sql_union.h" // mysql_union
#include "opt_subselect.h"
#include "sql_derived.h"
@@ -68,6 +69,7 @@
#include "my_json_writer.h"
#include "opt_trace.h"
#include "create_tmp_table.h"
+#include "optimizer_defaults.h"
/*
A key part number that means we're using a fulltext scan.
@@ -99,14 +101,7 @@
#define crash_if_first_double_is_bigger(A,B) DBUG_ASSERT(((A) == 0.0 && (B) == 0.0) || (A)/(B) < 1.0000001)
-#define double_to_rows(A) ((A) >= ((double)HA_POS_ERROR) ? HA_POS_ERROR : (ha_rows) (A))
-
-/* Cost for reading a row through an index */
-struct INDEX_READ_COST
-{
- double read_cost;
- double index_only_cost;
-};
+#define double_to_rows(A) ((A) >= ((double)HA_ROWS_MAX) ? HA_ROWS_MAX : (ha_rows) (A))
const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
"MAYBE_REF","ALL","range","index","fulltext",
@@ -257,7 +252,6 @@ static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond,
bool is_top_and_level);
static Item* part_of_refkey(TABLE *form,Field *field);
-uint find_shortest_key(TABLE *table, const key_map *usable_keys);
static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
ORDER *order, TABLE *table,
key_map usable_keys, int key,
@@ -331,7 +325,8 @@ static bool find_order_in_list(THD *, Ref_ptr_array, TABLE_LIST *, ORDER *,
List<Item> &, List<Item> &, bool, bool, bool);
static double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
- table_map rem_tables);
+ table_map rem_tables,
+ double *records_out);
void set_postjoin_aggr_write_func(JOIN_TAB *tab);
static Item **get_sargable_cond(JOIN *join, TABLE *table);
@@ -433,7 +428,7 @@ bool dbug_user_var_equals_str(THD *thd, const char *name, const char* value)
POSITION::POSITION()
{
table= 0;
- records_read= cond_selectivity= read_time= records_out= 0.0;
+ records_read= cond_selectivity= read_time= records_out= records_init= 0.0;
prefix_record_count= 0.0;
key= 0;
forced_index= 0;
@@ -1896,6 +1891,13 @@ int JOIN::optimize()
res= build_explain();
optimization_state= JOIN::OPTIMIZATION_DONE;
}
+
+ /*
+ Store the cost of this query into a user variable
+ TODO: calculate a correct cost for a query with subqueries and UNIONs.
+ */
+ if (select_lex->select_number == 1)
+ thd->status_var.last_query_cost= best_read;
return res;
}
@@ -2045,6 +2047,7 @@ JOIN::optimize_inner()
{
DBUG_ENTER("JOIN::optimize_inner");
subq_exit_fl= false;
+ best_read= 0.0;
DEBUG_SYNC(thd, "before_join_optimize");
THD_STAGE_INFO(thd, stage_optimizing);
@@ -3588,7 +3591,7 @@ bool JOIN::make_aggr_tables_info()
TABLE* table= create_tmp_table(thd, curr_tab->tmp_table_param,
all_fields,
NULL, distinct,
- TRUE, select_options, HA_POS_ERROR,
+ TRUE, select_options, HA_ROWS_MAX,
&empty_clex_str, !need_tmp,
keep_row_order);
if (!table)
@@ -4233,7 +4236,7 @@ bool
JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order)
{
tab->filesort=
- new (thd->mem_root) Filesort(order, HA_POS_ERROR, tab->keep_current_rowid,
+ new (thd->mem_root) Filesort(order, HA_ROWS_MAX, tab->keep_current_rowid,
tab->select);
if (!tab->filesort)
return true;
@@ -5270,7 +5273,6 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
DYNAMIC_ARRAY *keyuse_array)
{
int error= 0;
- TABLE *UNINIT_VAR(table); /* inited in all loops */
uint i,table_count,const_count,key;
uint sort_space;
table_map found_const_table_map, all_table_map;
@@ -5331,8 +5333,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
for (s= stat, i= 0; (tables= ti++); s++, i++)
{
TABLE_LIST *embedding= tables->embedding;
+ TABLE *table= tables->table;
stat_vector[i]=s;
- table_vector[i]=s->table=table=tables->table;
+ table_vector[i]= s->table= table;
s->tab_list= tables;
table->pos_in_table_list= tables;
error= tables->fetch_number_of_rows();
@@ -5465,7 +5468,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
for (s= stat ; s < stat_end ; s++)
{
- table= s->table;
+ TABLE *table= s->table;
for (JOIN_TAB *t= stat ; t < stat_end ; t++)
{
if (t->dependent & table->map)
@@ -5569,7 +5572,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
{
- table=s->table;
+ TABLE *table= s->table;
if (table->is_filled_at_execution())
continue;
@@ -5622,7 +5625,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
(*s->on_expr_ref)->is_expensive()))
{ // system table
int tmp= 0;
- s->type=JT_SYSTEM;
+ s->type= JT_SYSTEM;
join->const_table_map|=table->map;
set_position(join,const_count++,s,(KEYUSE*) 0);
if ((tmp= join_read_const_table(join->thd, s,
@@ -5825,19 +5828,20 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
s->startup_cost= 0;
if (s->type == JT_SYSTEM || s->type == JT_CONST)
{
-
Json_writer_object table_records(thd);
- /* Only one matching row */
- s->found_records= s->records= 1;
- s->records_out= 1.0;
+ ha_rows records= 1;
+ if (s->type == JT_SYSTEM || s->table->file->stats.records == 0)
+ records= s->table->file->stats.records;
+ /* zero or one matching row */
+ s->records= s->found_records= records;
+ s->records_init= s->records_out= rows2double(records);
s->read_time=1.0;
s->worst_seeks=1.0;
- table_records.add_table_name(s)
- .add("rows", s->found_records)
- .add("cost", s->read_time)
- .add("table_type", s->type == JT_CONST ?
- "const" :
- "system");
+ table_records.add_table_name(s).
+ add("rows", s->found_records).
+ add("cost", s->read_time).
+ add("table_type", s->type == JT_CONST ?
+ "const" : "system");
continue;
}
/*
@@ -5889,7 +5893,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
s->table->pos_in_table_list->is_materialized_derived())) // (3)
{
bool impossible_range= FALSE;
- ha_rows records= HA_POS_ERROR;
+ ha_rows records= HA_ROWS_MAX;
SQL_SELECT *select= 0;
Item **sargable_cond= NULL;
if (!s->const_keys.is_clear_all())
@@ -5956,6 +5960,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
}
else
{
+ double records= 1;
join->const_table_map|= s->table->map;
set_position(join,const_count++,s,(KEYUSE*) 0);
s->type= JT_CONST;
@@ -5966,7 +5971,10 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
s->info= ET_IMPOSSIBLE_ON_CONDITION;
found_const_table_map|= s->table->map;
mark_as_null_row(s->table); // All fields are NULL
+ records= 0;
}
+ s->records_init= s->records_out= records;
+ s->found_records= s->records= (ha_rows)records;
}
}
if (records != HA_POS_ERROR)
@@ -6055,7 +6063,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
for (i= 0; i < join->table_count ; i++)
if (double rr= join->best_positions[i].records_read)
records= COST_MULT(records, rr);
- rows= records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records;
+ rows= double_to_rows(records);
set_if_smaller(rows, unit->lim.get_select_limit());
join->select_lex->increase_derived_records(rows);
}
@@ -7697,8 +7705,9 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
{
join->positions[idx].table= table;
join->positions[idx].key=key;
- join->positions[idx].records_read=1.0; /* This is a const table */
- join->positions[idx].records_out=1.0; /* This is a const table */
+ join->positions[idx].records_read=1.0; /* This is a const table */
+ join->positions[idx].records_out=1.0; /* This is a const table */
+ join->positions[idx].records_init=1.0; /* This is a const table */
join->positions[idx].cond_selectivity= 1.0;
join->positions[idx].ref_depend_map= 0;
@@ -7751,7 +7760,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
TODO:
Extend with_found_constraint' to be set for a top level expression of type
X=Y where X and Y has fields from current table and at least one field from
- one o more previous tables.
+ one or more previous tables.
@see also
table_after_join_selectivity() produces selectivity of condition that is
@@ -7851,37 +7860,29 @@ INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table,
DBUG_ENTER("cost_for_index_read");
rows_adjusted= MY_MIN(rows2double(records), (double) thd->variables.max_seeks_for_key);
+ set_if_bigger(rows_adjusted, 1);
+
#ifdef OLD_CODE_LIMITED_SEEKS
set_if_smaller(rows_adjusted, worst_seeks);
#endif
if (file->is_clustering_key(key))
{
- cost.index_only_cost= file->ha_read_time(key, 1, (ha_rows)rows_adjusted);
- /*
- Same computation as in ha_read_and_copy_time()
- We do it explicitely here as we want to use the original value of
- records to compute the record copy cost.
- */
- cost.read_cost= (cost.index_only_cost +
- rows2double(records) * ROW_COPY_COST_THD(thd));
+ cost.index_only_cost=
+ file->ha_keyread_clustered_and_copy_time(key, 1, rows_adjusted, 0);
+ /* There is no 'index_only_read' with a clustered index */
+ cost.read_cost= cost.index_only_cost;
}
else if (table->covering_keys.is_set(key) && !table->no_keyread)
{
- cost.index_only_cost= file->ha_keyread_time(key, 1, (ha_rows)rows_adjusted);
+ cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0);
/* Same computation as in ha_keyread_and_copy_time() */
cost.read_cost= (cost.index_only_cost +
- rows2double(records) * KEY_COPY_COST_THD(thd));
+ rows2double(records) * file->KEY_COPY_COST);
}
else
{
- cost.index_only_cost= file->ha_keyread_time(key, 1, (ha_rows) rows_adjusted);
- /*
- Note that ha_read_time() + ..ROW_COPY_COST should be same
- as ha_rnd_pos_time().
- */
- cost.read_cost= (cost.index_only_cost +
- file->ha_read_time(key, 0, (ha_rows)rows_adjusted) +
- rows2double(records) * ROW_COPY_COST_THD(thd));
+ cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0);
+ cost.read_cost= (cost.index_only_cost + file->ha_rnd_pos_time(records));
}
DBUG_PRINT("statistics", ("index_cost: %.3f full_cost: %.3f",
cost.index_only_cost, cost.read_cost));
@@ -7950,8 +7951,8 @@ apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg,
read even if selectivity (and thus new_records) would be very low.
*/
new_cost= (MY_MAX(cost_of_accepted_rows,
- ranges * KEY_LOOKUP_COST * io_cost *
- table->file->optimizer_cache_cost) +
+ ranges * table->file->KEY_LOOKUP_COST +
+ ranges * io_cost * table->file->DISK_READ_RATIO) +
cost_of_rejected_rows + filter_lookup_cost);
new_total_cost= ((new_cost + new_records * WHERE_COST_THD(thd)) *
prev_records + filter_startup_cost);
@@ -8015,6 +8016,24 @@ apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg,
None
*/
+struct best_plan
+{
+ double cost; // Smallest cost found
+ double records; // Old 'Records'
+ double records_read; // Records accessed
+ double records_after_filter; // Records_read + filter
+ double records_out; // Smallest record count seen
+ Range_rowid_filter_cost_info *filter; // Best filter
+ KEYUSE *key; // Best key
+ SplM_plan_info *spl_plan;
+ table_map ref_depends_map;
+ enum join_type type;
+ uint forced_index;
+ uint max_key_part;
+ bool uses_jbuf;
+};
+
+
void
best_access_path(JOIN *join,
JOIN_TAB *s,
@@ -8030,14 +8049,7 @@ best_access_path(JOIN *join,
uint use_cond_selectivity=
thd->variables.optimizer_use_condition_selectivity;
TABLE *table= s->table;
- KEYUSE *best_key= 0;
- uint best_max_key_part= 0;
- uint best_forced_index= MAX_KEY, forced_index= MAX_KEY;
my_bool found_constraint= 0;
- double best_cost= DBL_MAX;
- double records= DBL_MAX;
- double records_out= table->stat_records() * table->cond_selectivity;
- table_map best_ref_depends_map= 0;
/*
key_dependent is 0 if all key parts could be used or if there was an
EQ_REF table found (which uses all key parts). In other words, we cannot
@@ -8045,18 +8057,29 @@ best_access_path(JOIN *join,
Otherwise it's a bitmap of tables that could improve key usage.
*/
table_map key_dependent= 0;
- Range_rowid_filter_cost_info *best_filter= 0;
double tmp;
ha_rows rec;
- bool best_uses_jbuf= FALSE;
MY_BITMAP *eq_join_set= &s->table->eq_join_set;
KEYUSE *hj_start_key= 0;
- SplM_plan_info *spl_plan= 0;
- enum join_type best_type= JT_UNKNOWN, type= JT_UNKNOWN;
Loose_scan_opt loose_scan_opt;
+ struct best_plan best;
Json_writer_object trace_wrapper(thd, "best_access_path");
DBUG_ENTER("best_access_path");
+ best.cost= DBL_MAX;
+ best.records= DBL_MAX;
+ best.records_read= DBL_MAX;
+ best.records_after_filter= DBL_MAX;
+ best.records_out= table->stat_records() * table->cond_selectivity;
+ best.filter= 0;
+ best.key= 0;
+ best.max_key_part= 0;
+ best.type= JT_UNKNOWN;
+ best.forced_index= MAX_KEY;
+ best.ref_depends_map= 0;
+ best.uses_jbuf= FALSE;
+ best.spl_plan= 0;
+
disable_jbuf= disable_jbuf || idx == join->const_tables;
trace_wrapper.add_table_name(s);
@@ -8066,7 +8089,7 @@ best_access_path(JOIN *join,
loose_scan_opt.init(join, s, remaining_tables);
if (table->is_splittable())
- spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+ best.spl_plan= s->choose_best_splitting(record_count, remaining_tables);
if (unlikely(thd->trace_started()))
{
@@ -8077,10 +8100,10 @@ best_access_path(JOIN *join,
if (s->keyuse)
{ /* Use key if possible */
- KEYUSE *keyuse;
- KEYUSE *start_key=0;
- double best_records= DBL_MAX, index_only_cost= DBL_MAX;
+ KEYUSE *keyuse, *start_key= 0;
+ double index_only_cost= DBL_MAX;
uint max_key_part=0;
+ enum join_type type= JT_UNKNOWN;
/* Test how we can use keys */
rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key
@@ -8102,7 +8125,7 @@ best_access_path(JOIN *join,
key_part_map ref_or_null_part= 0;
key_part_map all_parts= 0;
double startup_cost= s->startup_cost;
- double records_after_filter;
+ double records_after_filter, records_best_filter, records;
Range_rowid_filter_cost_info *filter= 0;
if (is_hash_join_key_no(key))
@@ -8333,7 +8356,6 @@ best_access_path(JOIN *join,
((double) (table->s->max_key_length-keyinfo->key_length) /
(double) table->s->max_key_length)));
set_if_smaller(records, (double)s->records);
- set_if_smaller(records_out, records);
if (records < 2.0)
records=2.0; /* Can't be as good as a unique */
}
@@ -8400,6 +8422,8 @@ best_access_path(JOIN *join,
(!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) ||
found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts)))
{
+ double extra_cost= 0;
+
max_key_part= max_part_bit(found_part);
/*
ReuseRangeEstimateForRef-3:
@@ -8524,7 +8548,7 @@ best_access_path(JOIN *join,
a*keyinfo->user_defined_key_parts - rec_per_key)/
(keyinfo->user_defined_key_parts-1);
else
- records= a;
+ records= rows2double(s->records);
set_if_bigger(records, MIN_ROWS_AFTER_FILTERING);
}
}
@@ -8533,6 +8557,7 @@ best_access_path(JOIN *join,
{
/* We need to do two key searches to find row */
records *= 2.0;
+ extra_cost= s->table->file->KEY_LOOKUP_COST;
}
/*
@@ -8562,13 +8587,14 @@ best_access_path(JOIN *join,
}
/* Limit the number of matched rows */
+ set_if_smaller(records, (double) s->records);
tmp= records;
set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
INDEX_READ_COST cost= cost_for_index_read(thd, table, key,
(ha_rows) tmp,
(ha_rows) s->worst_seeks);
tmp= cost.read_cost;
- index_only_cost= cost.index_only_cost;
+ index_only_cost= cost.index_only_cost+extra_cost;
}
else
{
@@ -8590,7 +8616,7 @@ best_access_path(JOIN *join,
if (records == DBL_MAX) // Key not usable
continue;
- records_after_filter= records;
+ records_best_filter= records_after_filter= records;
/*
Check that start_key->key can be used for index access
@@ -8604,7 +8630,8 @@ best_access_path(JOIN *join,
tmp,
index_only_cost,
record_count,
- &records_out);
+ &records_best_filter);
+ set_if_smaller(best.records_out, records_best_filter);
if (filter)
filter= filter->apply_filter(thd, table, &tmp, &records_after_filter,
&startup_cost,
@@ -8625,20 +8652,31 @@ best_access_path(JOIN *join,
The COST_EPS is here to ensure we use the first key if there are
two 'identical keys' that could be used.
*/
- if (tmp + COST_EPS < best_cost)
+ if (tmp + COST_EPS < best.cost)
{
trace_access_idx.add("chosen", true);
- best_cost= tmp;
+ best.cost= tmp;
/*
We use 'records' instead of 'records_after_filter' here as we want
to have EXPLAIN print the number of rows found by the key access.
*/
- best_records= records; // Records before filter!
- best_key= start_key;
- best_max_key_part= max_key_part;
- best_ref_depends_map= found_ref;
- best_filter= filter;
- best_type= type;
+ best.records= records; // Records before filter!
+ best.records_read= records;
+ /*
+ If we are using 'use_cond_selectivity > 1' then
+ table_after_join_selectivity() may take into account other
+ filters that what is currently used so we have to use
+ records_after_filter. If 'use_cond_selectivity <= 1 then we
+ can use information from the best filter.
+ */
+ best.records_after_filter= ((use_cond_selectivity > 1) ?
+ records_after_filter :
+ records_best_filter);
+ best.key= start_key;
+ best.max_key_part= max_key_part;
+ best.ref_depends_map= found_ref;
+ best.filter= filter;
+ best.type= type;
}
else if (unlikely(thd->trace_started()))
{
@@ -8646,9 +8684,8 @@ best_access_path(JOIN *join,
add("chosen", false).
add("cause", cause ? cause : "cost");
}
- set_if_smaller(records_out, records);
+ set_if_smaller(best.records_out, records);
} /* for each key */
- records= best_records;
}
else
{
@@ -8671,7 +8708,7 @@ best_access_path(JOIN *join,
/* Add dependency for sub queries */
key_dependent|= s->embedded_dependent;
- } /* if (s->keyuse) */
+ } /* if (s->keyuse) */
/* Check that s->key_dependent contains all used_tables found in s->keyuse */
@@ -8687,7 +8724,7 @@ best_access_path(JOIN *join,
(1) s is inner table of semi-join -> join cache is allowed for semijoins
(2) s is inner table of outer join -> join cache is allowed for outer joins
*/
- if (idx > join->const_tables && best_key == 0 &&
+ if (idx > join->const_tables && best.key == 0 &&
(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
join->max_allowed_join_cache_level > 2 &&
!bitmap_is_clear_all(eq_join_set) && !disable_jbuf &&
@@ -8696,11 +8733,11 @@ best_access_path(JOIN *join,
(!(table->map & join->outer_join) ||
join->allowed_outer_join_with_cache)) // (2)
{
- double refills, cmp_time;
+ double refills, row_copy_cost, cmp_time;
/* Estimate the cost of the hash join access to the table */
- double rnd_records= matching_candidates_in_table(s, found_constraint,
+ double rnd_records= matching_candidates_in_table(s, 0,
use_cond_selectivity);
- set_if_smaller(records_out, rnd_records);
+ set_if_smaller(best.records_out, rnd_records);
/*
The following cost calculation is identical to the cost calculation for
@@ -8729,18 +8766,22 @@ best_access_path(JOIN *join,
We assume here that, thanks to the hash, we don't have to compare all
row combinations, only a HASH_FANOUT (10%) rows in the cache.
*/
- cmp_time= (rnd_records * record_count * HASH_FANOUT *
- (ROW_COPY_COST_THD(thd) * JOIN_CACHE_ROW_COPY_COST_FACTOR +
+ row_copy_cost= (ROW_COPY_COST_THD(thd) * 2 *
+ JOIN_CACHE_ROW_COPY_COST_FACTOR(thd));
+ cmp_time= (record_count * row_copy_cost +
+ rnd_records * record_count * HASH_FANOUT *
+ ((idx - join->const_tables) * row_copy_cost +
WHERE_COST_THD(thd)));
tmp= COST_ADD(tmp, cmp_time);
- best_cost= tmp;
- records= rnd_records;
- best_key= hj_start_key;
- best_ref_depends_map= 0;
- best_uses_jbuf= TRUE;
- best_filter= 0;
- best_type= JT_HASH;
+ best.cost= tmp;
+ best.records_read= best.records_after_filter= rows2double(s->records);
+ best.records= rnd_records;
+ best.key= hj_start_key;
+ best.ref_depends_map= 0;
+ best.uses_jbuf= TRUE;
+ best.filter= 0;
+ best.type= JT_HASH;
Json_writer_object trace_access_hash(thd);
if (unlikely(trace_access_hash.trace_started()))
trace_access_hash.
@@ -8748,7 +8789,7 @@ best_access_path(JOIN *join,
add("index", "hj-key").
add("rows", rnd_records).
add("refills", refills).
- add("cost", best_cost).
+ add("cost", best.cost).
add("chosen", true);
}
@@ -8788,21 +8829,25 @@ best_access_path(JOIN *join,
be used for cases with small datasets, which is annoying.
*/
Json_writer_object trace_access_scan(thd);
- if ((records >= s->found_records || best_cost > s->read_time) && // (1)
- !(best_key && best_key->key == MAX_KEY) && // (2)
+ if ((best.records_read >= s->found_records ||
+ best.cost > s->read_time) && // (1)
+ !(best.key && best.key->key == MAX_KEY) && // (2)
!(s->quick &&
s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2)
- best_key && s->quick->index == best_key->key && // (2)
- best_max_key_part >= table->opt_range[best_key->key].key_parts) &&// (2)
- !((table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3)
- !table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3)
- !(table->force_index_join && best_key && !s->quick) && // (4)
- !(best_key && table->pos_in_table_list->jtbm_subselect)) // (5)
+ best.key && s->quick->index == best.key->key && // (2)
+ best.max_key_part >= table->opt_range[best.key->key].key_parts) &&// (2)
+ !((table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3)
+ !table->covering_keys.is_clear_all() && best.key && !s->quick) &&// (3)
+ !(table->force_index_join && best.key && !s->quick) && // (4)
+ !(best.key && table->pos_in_table_list->jtbm_subselect)) // (5)
{ // Check full join
- double rnd_records, records_after_filter, org_records;
+ double records_after_filter, org_records;
+ double records_best_filter;
Range_rowid_filter_cost_info *filter= 0;
double startup_cost= s->startup_cost;
const char *scan_type= "";
+ enum join_type type;
+ uint forced_index= MAX_KEY;
/*
Range optimizer never proposes a RANGE if it isn't better
@@ -8832,7 +8877,8 @@ best_access_path(JOIN *join,
This is done to make records found comparable to what we get with
'ref' access.
*/
- org_records= records_after_filter= rnd_records= rows2double(s->found_records);
+ org_records= records_after_filter= rows2double(s->found_records);
+ records_best_filter= org_records;
if (s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
{
@@ -8850,11 +8896,13 @@ best_access_path(JOIN *join,
range->cost / s->quick->read_time >= 0.9999999));
filter=
- table->best_range_rowid_filter_for_partial_join(key_no, rows2double(range->rows),
+ table->best_range_rowid_filter_for_partial_join(key_no,
+ rows2double(range->rows),
range->find_cost,
range->index_only_cost,
record_count,
- &records_out);
+ &records_best_filter);
+ set_if_smaller(best.records_out, records_best_filter);
if (filter)
{
double filter_cost= range->fetch_cost;
@@ -8883,20 +8931,18 @@ best_access_path(JOIN *join,
{
type= JT_INDEX_MERGE;
}
- set_if_smaller(records_out, records_after_filter);
loose_scan_opt.check_range_access(join, idx, s->quick);
}
else
{
/* We will now calculate cost of scan, with or without join buffer */
- rnd_records= matching_candidates_in_table(s, found_constraint,
- use_cond_selectivity);
- records_after_filter= rnd_records;
- set_if_smaller(records_out, rnd_records);
+ records_after_filter= matching_candidates_in_table(s, 0,
+ use_cond_selectivity);
+ DBUG_ASSERT(records_after_filter <= s->records);
- org_records= rows2double(s->records);
+ set_if_smaller(best.records_out, records_after_filter);
- DBUG_ASSERT(rnd_records <= s->records);
+ org_records= rows2double(s->records);
/* Estimate cost of reading table. */
if (s->cached_forced_index_type)
@@ -8907,7 +8953,7 @@ best_access_path(JOIN *join,
}
else
{
- if (table->force_index_join && !best_key)
+ if (table->force_index_join && !best.key)
{
/*
The query is using 'forced_index' and we did not find a usable key.
@@ -8951,6 +8997,7 @@ best_access_path(JOIN *join,
tmp= s->cached_scan_and_compare_time;
type= JT_ALL;
}
+ /* Cache result for other calls */
s->cached_forced_index_type= type;
s->cached_forced_index_cost= tmp;
s->cached_forced_index= forced_index;
@@ -8977,7 +9024,7 @@ best_access_path(JOIN *join,
else
{
/* Scan trough join cache */
- double cmp_time, refills;
+ double cmp_time, row_copy_cost, refills;
/*
Calculate cost of checking the the WHERE for this table.
@@ -8995,13 +9042,16 @@ best_access_path(JOIN *join,
/* We come here only if there are already rows in the join cache */
DBUG_ASSERT(idx != join->const_tables);
/*
- Cost of moving each row from each previous table from the join cache
- to it's table record and comparing it with the found and accepted
- row.
+ Cost of:
+ - Copying all previous record combinations to the join cache
+ - Copying the tables from the join cache to table records
+ - Checking the WHERE against the final row combination
*/
- cmp_time= (rnd_records * record_count *
- (ROW_COPY_COST_THD(thd) * (idx - join->const_tables) *
- JOIN_CACHE_ROW_COPY_COST_FACTOR +
+ row_copy_cost= (ROW_COPY_COST_THD(thd) *
+ JOIN_CACHE_ROW_COPY_COST_FACTOR(thd));
+ cmp_time= (record_count * row_copy_cost +
+ records_after_filter * record_count *
+ ((idx - join->const_tables) * row_copy_cost +
WHERE_COST_THD(thd)));
tmp= COST_ADD(tmp, cmp_time);
}
@@ -9017,10 +9067,10 @@ best_access_path(JOIN *join,
trace_access_scan.
add("access_type",
type == JT_ALL ? scan_type : join_type_str[type]).
- add("rows", org_records).
- add("rows_after_scan", rnd_records).
- add("rows_after_filter", records_after_filter).
- add("cost", tmp);
+ add("rows", org_records).
+ add("rows_after_filter", records_after_filter).
+ add("rows_out", best.records_out).
+ add("cost", tmp);
if (type == JT_ALL)
{
trace_access_scan.add("index_only",
@@ -9028,27 +9078,38 @@ best_access_path(JOIN *join,
}
}
- if (tmp + COST_EPS < best_cost)
+ if (tmp + COST_EPS < best.cost)
{
/*
If the table has a range (s->quick is set) make_join_select()
will ensure that this will be used
*/
- best_cost= tmp;
- records= rnd_records;
- best_key= 0;
- best_forced_index= forced_index;
+ best.cost= tmp;
+ best.records_read= org_records; // Records accessed
+ best.records= records_after_filter; // Records to be checked with WHERE
+ /*
+ If we are using 'use_cond_selectivity > 1' then
+ table_after_join_selectivity may take into account other
+ filters that what is currently used so we have to use
+ records_after_filter. If 'use_cond_selectivity <= 1 then we
+ can use information from the best filter.
+ */
+ best.records_after_filter= ((use_cond_selectivity > 1) ?
+ records_after_filter :
+ records_best_filter);
+ best.key= 0;
+ best.forced_index= forced_index;
/*
filter is only set if
s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE
*/
- best_filter= filter;
+ best.filter= filter;
/* range/index_merge/ALL/index access method are "independent", so: */
- best_ref_depends_map= 0;
- best_uses_jbuf= MY_TEST(!disable_jbuf && !((table->map &
+ best.ref_depends_map= 0;
+ best.uses_jbuf= MY_TEST(!disable_jbuf && !((table->map &
join->outer_join)));
- spl_plan= 0;
- best_type= type;
+ best.spl_plan= 0;
+ best.type= type;
trace_access_scan.add("chosen", true);
}
else
@@ -9063,29 +9124,33 @@ best_access_path(JOIN *join,
add("cause", "cost");
}
+ crash_if_first_double_is_bigger(best.records_out, best.records);
+ crash_if_first_double_is_bigger(best.records_out, best.records_read);
+
/* Update the cost information for the current partial plan */
- crash_if_first_double_is_bigger(records_out, records);
- pos->records_read= records;
- pos->records_out= records_out;
- pos->read_time= best_cost;
- pos->key= best_key;
- pos->forced_index= best_forced_index;
- pos->type= best_type;
+ pos->records_init= best.records_read;
+ pos->records_after_filter= best.records_after_filter;
+ pos->records_read= best.records;
+ pos->records_out= best.records_out;
+ pos->read_time= best.cost;
+ pos->key= best.key;
+ pos->forced_index= best.forced_index;
+ pos->type= best.type;
pos->table= s;
- pos->ref_depend_map= best_ref_depends_map;
+ pos->ref_depend_map= best.ref_depends_map;
pos->loosescan_picker.loosescan_key= MAX_KEY;
- pos->use_join_buffer= best_uses_jbuf;
- pos->spl_plan= spl_plan;
- pos->range_rowid_filter_info= best_filter;
- pos->key_dependent= (best_type == JT_EQ_REF ? (table_map) 0 :
+ pos->use_join_buffer= best.uses_jbuf;
+ pos->spl_plan= best.spl_plan;
+ pos->range_rowid_filter_info= best.filter;
+ pos->key_dependent= (best.type == JT_EQ_REF ? (table_map) 0 :
key_dependent & remaining_tables);
loose_scan_opt.save_to_position(s, loose_scan_pos);
- if (!best_key &&
- idx == join->const_tables &&
+ if (!best.key &&
+ idx == join->const_tables && // First table
table == join->sort_by_table &&
- join->unit->lim.get_select_limit() >= records)
+ join->unit->lim.get_select_limit() >= best.records) // QQQ Why?
{
trace_access_scan.add("use_tmp_table", true);
join->sort_by_table= (TABLE*) 1; // Must use temporary table
@@ -9320,15 +9385,6 @@ choose_plan(JOIN *join, table_map join_tables, TABLE_LIST *emb_sjm_nest)
DBUG_RETURN(TRUE);
}
- /*
- Store the cost of this query into a user variable
- Don't update last_query_cost for statements that are not "flat joins" :
- i.e. they have subqueries, unions or call stored procedures.
- TODO: calculate a correct cost for a query with subqueries and UNIONs.
- */
- if (join->thd->lex->is_single_level_stmt())
- join->thd->status_var.last_query_cost= join->best_read;
-
join->emb_sjm_nest= 0;
DBUG_RETURN(FALSE);
}
@@ -9595,6 +9651,8 @@ optimize_straight_join(JOIN *join, table_map remaining_tables)
{
POSITION *position= join->positions + idx;
Json_writer_object trace_one_table(thd);
+ double original_record_count, current_record_count;
+
if (unlikely(thd->trace_started()))
trace_plan_prefix(join, idx, remaining_tables);
/* Find the best access method from 's' to the current partial plan */
@@ -9603,22 +9661,71 @@ optimize_straight_join(JOIN *join, table_map remaining_tables)
position, &loose_scan_pos);
/* Compute the cost of the new plan extended with 's' */
- record_count= COST_MULT(record_count, position->records_read);
+ current_record_count= COST_MULT(record_count, position->records_out);
read_time= COST_ADD(read_time, position->read_time);
- optimize_semi_joins(join, remaining_tables, idx, &record_count, &read_time,
- &loose_scan_pos);
+ original_record_count= current_record_count;
+ optimize_semi_joins(join, remaining_tables, idx, &current_record_count,
+ &read_time, &loose_scan_pos);
+ if (position->sj_strategy != SJ_OPT_NONE && original_record_count)
+ {
+ /* Adjust records_out to contain the final number of rows */
+ double ratio= current_record_count / original_record_count;
+ /* QQQ This is just to stop an assert later */
+ if (ratio < 1)
+ position->records_out*= ratio;
+ }
+
remaining_tables&= ~(s->table->map);
- double pushdown_cond_selectivity= 1.0;
- if (use_cond_selectivity > 1)
+ if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE)
+ {
+ double pushdown_cond_selectivity, records_out;
pushdown_cond_selectivity= table_after_join_selectivity(join, idx, s,
- remaining_tables);
- position->cond_selectivity= pushdown_cond_selectivity;
+ remaining_tables,
+ &records_out);
+ if (unlikely(thd->trace_started()) &&
+ pushdown_cond_selectivity != 1.0)
+ {
+ trace_one_table.
+ add("pushdown_cond_selectivity", pushdown_cond_selectivity).
+ add("rows_out", records_out);
+ }
+ position->cond_selectivity= pushdown_cond_selectivity;
+ position->records_out= records_out;
+ current_record_count= COST_MULT(record_count, records_out);
+ }
+ else
+ position->cond_selectivity= 1.0;
++idx;
+ record_count= current_record_count;
}
if (join->sort_by_table &&
join->sort_by_table != join->positions[join->const_tables].table->table)
- read_time+= record_count; // We have to make a temp table
+ {
+ /*
+ We may have to make a temp table, note that this is only a
+ heuristic since we cannot know for sure at this point if we
+ we are going to use addon fields or to have flush sorting to
+ disk. We also don't know the temporary table will be in memory
+ or disk.
+ The following calculation takes a middle ground where assume
+ we can sort the keys in memory but have to use a disk based
+ temporary table to retrive the rows.
+ This cost is probably much bigger than it has to be...
+ */
+ double sort_cost;
+ sort_cost= (get_qsort_sort_cost((ha_rows)record_count, 0) +
+ record_count *
+ DISK_TEMPTABLE_LOOKUP_COST(thd));
+ {
+ if (unlikely(thd->trace_started()))
+ {
+ Json_writer_object trace_one_table(thd);
+ trace_one_table.add("estimated_cost_for_sorting", sort_cost);
+ }
+ }
+ read_time= COST_ADD(read_time, sort_cost);
+ }
memcpy((uchar*) join->best_positions, (uchar*) join->positions,
sizeof(POSITION)*idx);
join->join_record_count= record_count;
@@ -9997,8 +10104,7 @@ double JOIN::get_examined_rows()
COST_MULT((double) (tab->get_examined_rows()), prev_fanout));
prev_tab= tab;
}
- examined_rows= (double)
- (records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records);
+ examined_rows= double_to_rows(records);
return examined_rows;
}
@@ -10129,9 +10235,10 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
@brief
Get the selectivity of conditions when joining a table
- @param join The optimized join
- @param s The table to be joined for evaluation
- @param rem_tables The bitmap of tables to be joined later
+ @param join The optimized join
+ @param s The table to be joined for evaluation
+ @param rem_tables The bitmap of tables to be joined later
+ @param new_records_out OUT Set to number of rows accepted
@detail
Get selectivity of conditions that can be applied when joining this table
@@ -10145,12 +10252,14 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
condition, "COND(this_table) AND COND(this_table, previous_tables)".
@retval
- selectivity of the conditions imposed on the rows of s
+ selectivity of the conditions imposed on the rows of s related to
+ the rows that we are expected to read (position->records_init).
*/
static
double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
- table_map rem_tables)
+ table_map rem_tables,
+ double *new_records_out)
{
uint16 ref_keyuse_steps_buf[MAX_REF_PARTS];
uint ref_keyuse_size= MAX_REF_PARTS;
@@ -10158,13 +10267,14 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
Field *field;
TABLE *table= s->table;
MY_BITMAP *read_set= table->read_set;
- double sel= table->cond_selectivity;
POSITION *pos= &join->positions[idx];
+ double sel, records_out= pos->records_out;
uint keyparts= 0;
uint found_part_ref_or_null= 0;
if (pos->key != 0)
{
+ sel= table->cond_selectivity;
/*
A ref access or hash join is used for this table. ref access is created
from
@@ -10338,35 +10448,22 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
}
keyuse++;
}
- }
- else
- {
/*
- The table is accessed with full table scan, or quick select.
- Selectivity of COND(table) is already accounted for in
- matching_candidates_in_table().
- */
- sel= 1.0;
- }
+ If the field f from the table is equal to a field from one the
+ earlier joined tables then the selectivity of the range conditions
+ over the field f must be discounted.
- /*
- If the field f from the table is equal to a field from one the
- earlier joined tables then the selectivity of the range conditions
- over the field f must be discounted.
-
- We need to discount selectivity only if we're using ref-based
- access method (and have sel!=1).
- If we use ALL/range/index_merge, then sel==1, and no need to discount.
- */
- if (pos->key != NULL)
- {
+ We need to discount selectivity only if we're using ref-based
+ access method (and have sel!=1).
+ If we use ALL/range/index_merge, then sel==1, and no need to discount.
+ */
for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
{
if (!bitmap_is_set(read_set, field->field_index) ||
!field->next_equal_field)
- continue;
- for (Field *next_field= field->next_equal_field;
- next_field != field;
+ continue;
+ for (Field *next_field= field->next_equal_field;
+ next_field != field;
next_field= next_field->next_equal_field)
{
if (!(next_field->table->map & rem_tables) &&
@@ -10381,14 +10478,39 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
}
}
}
+ /*
+ We have now calculated a more exact 'records_out' taking more index
+ costs into account.
+ pos->records_out previously contained the smallest record count for
+ all range or ref access, which should not be smaller than what we
+ calculated above.
+ */
+ records_out= pos->records_after_filter * sel;
+ set_if_smaller(records_out, pos->records_out);
}
- sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables,
+ sel= table_multi_eq_cond_selectivity(join, idx, s, rem_tables,
keyparts, ref_keyuse_steps);
+ records_out*= sel;
+
+ /*
+ Update sel to be relative pos->records_read as that is what some old
+ code expects. Newer code should just use 'position->records_out' instead.
+ */
+ if (pos->records_read == 0)
+ sel= 1.0;
+ else
+ {
+ sel= records_out / pos->records_read;
+ DBUG_ASSERT(sel >= 0.0 and sel <= 1.00001);
+ if (sel > 1.0)
+ sel= 1.0;
+ }
+
exit:
+ *new_records_out= records_out;
if (ref_keyuse_steps != ref_keyuse_steps_buf)
my_free(ref_keyuse_steps);
- DBUG_ASSERT(sel >= 0.0 and sel <= 1.0);
return sel;
}
@@ -10407,7 +10529,7 @@ check_if_edge_table(POSITION *pos,
if ((pos->type == JT_EQ_REF ||
(pos->type == JT_REF &&
- pos->records_read == 1 &&
+ pos->records_init == 1 &&
!pos->range_rowid_filter_info)) &&
pushdown_cond_selectivity >= 0.999)
return SEARCH_FOUND_EDGE;
@@ -10600,7 +10722,7 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx,
// pplan_cost already too great, stop search
continue;
- pplan= expand pplan by best_access_method;
+ pplan= expand plan by best_access_method;
remaining_tables= remaining_tables - table T;
if (remaining_tables is not an empty set
and
@@ -10671,8 +10793,8 @@ best_extension_by_limited_search(JOIN *join,
{
THD *thd= join->thd;
/*
- 'join' is a partial plan with lower cost than the best plan so far,
- so continue expanding it further with the tables in 'remaining_tables'.
+ 'join' is a partial plan with lower cost than the best plan so far,
+ so continue expanding it further with the tables in 'remaining_tables'.
*/
JOIN_TAB *s;
double best_record_count= DBL_MAX;
@@ -10689,14 +10811,14 @@ best_extension_by_limited_search(JOIN *join,
if (dbug_user_var_equals_int(thd,
"show_explain_probe_select_id",
join->select_lex->select_number))
- dbug_serve_apcs(thd, 1);
- );
+ dbug_serve_apcs(thd, 1);
+ );
if (unlikely(thd->check_killed())) // Abort
DBUG_RETURN(SEARCH_ABORT);
DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
- "part_plan"););
+ "part_plan"););
status_var_increment(thd->status_var.optimizer_join_prefixes_check_calls);
if (join->emb_sjm_nest)
@@ -10785,7 +10907,7 @@ best_extension_by_limited_search(JOIN *join,
!check_interleaving_with_nj(s))
{
table_map real_table_bit= s->table->map;
- double current_record_count, current_read_time;
+ double current_record_count, current_read_time, original_record_count;
double partial_join_cardinality;
POSITION *position= join->positions + idx, *loose_scan_pos;
double pushdown_cond_selectivity;
@@ -10802,7 +10924,7 @@ best_extension_by_limited_search(JOIN *join,
loose_scan_pos= pos->position+1;
/* Compute the cost of the new plan extended with 's' */
- current_record_count= COST_MULT(record_count, position->records_read);
+ current_record_count= COST_MULT(record_count, position->records_out);
current_read_time= COST_ADD(read_time, position->read_time);
if (unlikely(trace_one_table.trace_started()))
@@ -10811,9 +10933,22 @@ best_extension_by_limited_search(JOIN *join,
add("rows_for_plan", current_record_count).
add("cost_for_plan", current_read_time);
}
+ original_record_count= current_record_count;
optimize_semi_joins(join, remaining_tables, idx, &current_record_count,
&current_read_time, loose_scan_pos);
-
+ if (position->sj_strategy != SJ_OPT_NONE)
+ {
+ /* Adjust records_out and current_record_count after semi join */
+ double ratio= current_record_count / original_record_count;
+ /* QQQ This is just to stop an assert later */
+ if (ratio < 1.0)
+ position->records_out*= ratio;
+ if (unlikely(trace_one_table.trace_started()))
+ {
+ trace_one_table.add("sj_rows_out", position->records_out);
+ trace_one_table.add("sj_rows_for_plan", current_record_count);
+ }
+ }
/* Expand only partial plans with lower cost than the best QEP so far */
if (current_read_time + COST_EPS >= join->best_read)
{
@@ -10864,15 +10999,15 @@ best_extension_by_limited_search(JOIN *join,
if (best_record_count > current_record_count ||
best_read_time > current_read_time ||
(idx == join->const_tables && // 's' is the first table in the QEP
- s->table == join->sort_by_table))
+ s->table == join->sort_by_table))
{
/*
Store the current record count and cost as the best
possible cost at this level if the following holds:
- It's the lowest record number and cost so far
- - There is no remaing table that could improve index usage
- or we found an EQ_REF or REF key with less than 2
- matching records (good enough).
+ - There is no remaing table that could improve index usage
+ or we found an EQ_REF or REF key with less than 2
+ matching records (good enough).
*/
if (best_record_count >= current_record_count &&
best_read_time >= current_read_time &&
@@ -10924,17 +11059,26 @@ best_extension_by_limited_search(JOIN *join,
}
pushdown_cond_selectivity= 1.0;
- if (use_cond_selectivity > 1)
+ /*
+ TODO: When a semi-join strategy is applied (sj_strategy!=SJ_OPT_NONE),
+ we should account for selectivity from table_after_join_selectivity().
+ (Condition filtering is performed before the semi-join removes some
+ fanout so this might require moving the code around)
+ */
+ if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE)
+ {
pushdown_cond_selectivity=
table_after_join_selectivity(join, idx, s,
- remaining_tables & ~real_table_bit);
+ remaining_tables & ~real_table_bit,
+ &position->records_out);
+ }
join->positions[idx].cond_selectivity= pushdown_cond_selectivity;
- partial_join_cardinality= (current_record_count *
- pushdown_cond_selectivity);
+ partial_join_cardinality= record_count * position->records_out;
- if (unlikely(thd->trace_started()) && pushdown_cond_selectivity < 1.0)
+ if (unlikely(thd->trace_started()) && pushdown_cond_selectivity < 1.0 &&
+ partial_join_cardinality < current_record_count)
trace_one_table
.add("selectivity", pushdown_cond_selectivity)
.add("estimated_join_cardinality", partial_join_cardinality);
@@ -10979,11 +11123,21 @@ best_extension_by_limited_search(JOIN *join,
{
/*
We may have to make a temp table, note that this is only a
- heuristic since we cannot know for sure at this point.
- Hence it may be wrong.
+ heuristic since we cannot know for sure at this point if we
+ we are going to use addon fields or to have flush sorting to
+ disk. We also don't know the temporary table will be in memory
+ or disk.
+ The following calculation takes a middle ground where assume
+ we can sort the keys in memory but have to use a disk based
+ temporary table to retrive the rows.
+ This cost is probably much bigger than it has to be...
*/
- trace_one_table.add("cost_for_sorting", current_record_count);
- current_read_time= COST_ADD(current_read_time, current_record_count);
+ double sort_cost;
+ sort_cost= (get_qsort_sort_cost((ha_rows)current_record_count,0) +
+ current_record_count *
+ DISK_TEMPTABLE_LOOKUP_COST(thd));
+ trace_one_table.add("cost_for_sorting", sort_cost);
+ current_read_time= COST_ADD(current_read_time, sort_cost);
}
if (current_read_time < join->best_read)
{
@@ -11318,11 +11472,8 @@ prev_record_reads(const POSITION *positions, uint idx, table_map found_ref)
is an inprecise estimate and adding 1 (or, in the worst case,
#max_nested_outer_joins=64-1) will not make it any more precise.
*/
- if (pos->records_read)
- {
- found= COST_MULT(found, pos->records_read);
- found*= pos->cond_selectivity;
- }
+ if (pos->records_out)
+ found= COST_MULT(found, pos->records_out);
}
}
return found;
@@ -11752,7 +11903,7 @@ bool JOIN::get_best_combination()
*/
SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info;
j->records_read= (sjm->is_sj_scan? sjm->rows : 1.0);
- j->records_out= j->records_read;
+ j->records_init= j->records_out= j->records_read;
j->records= (ha_rows) j->records_read;
j->cond_selectivity= 1.0;
JOIN_TAB *jt;
@@ -11787,6 +11938,7 @@ bool JOIN::get_best_combination()
if (j->type == JT_SYSTEM)
goto loop_end;
+
if (!(keyuse= cur_pos->key))
{
if (cur_pos->type == JT_NEXT) // Forced index
@@ -11807,17 +11959,19 @@ bool JOIN::get_best_combination()
j->range_rowid_filter_info=
cur_pos->range_rowid_filter_info;
- loop_end:
- /*
+ /*
Save records_read in JOIN_TAB so that select_describe()/etc don't have
to access join->best_positions[].
*/
+ j->records_init= cur_pos->records_init;
j->records_read= cur_pos->records_read;
j->records_out= cur_pos->records_out;
+
+ loop_end:
j->cond_selectivity= cur_pos->cond_selectivity;
DBUG_ASSERT(j->cond_selectivity <= 1.0);
crash_if_first_double_is_bigger(j->records_out,
- j->records_read *
+ j->records_init *
(j->range_rowid_filter_info ?
j->range_rowid_filter_info->selectivity :
1.0));
@@ -12580,7 +12734,10 @@ make_outerjoin_info(JOIN *join)
{
if (embedding->is_active_sjm())
{
- /* We're trying to walk out of an SJ-Materialization nest. Don't do this. */
+ /*
+ We're trying to walk out of an SJ-Materialization nest.
+ Don't do this.
+ */
break;
}
/* Ignore sj-nests: */
@@ -12861,8 +13018,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
tab->use_quick=1;
tab->ref.key= -1;
tab->ref.key_parts=0; // Don't use ref key.
- join->best_positions[i].records_read= rows2double(tab->quick->records);
- /*
+ join->best_positions[i].records_read=
+ join->best_positions[i].records_out=
+ rows2double(tab->quick->records);
+ /*
We will use join cache here : prevent sorting of the first
table only and sort at the end.
*/
@@ -14906,14 +15065,14 @@ void JOIN_TAB::cleanup()
/**
Estimate the time to get rows of the joined table
- Updates found_records, records, cached_scan_time, cached_covering_key,
- read_time and cache_scan_and_compare_time
+ Updates found_records, records, cached_covering_key, read_time and
+ cache_scan_and_compare_time
*/
void JOIN_TAB::estimate_scan_time()
{
THD *thd= join->thd;
- double copy_cost= ROW_COPY_COST_THD(thd);
+ double copy_cost;
cached_covering_key= MAX_KEY;
if (table->is_created())
@@ -14924,6 +15083,7 @@ void JOIN_TAB::estimate_scan_time()
&startup_cost);
table->opt_range_condition_rows= records;
table->used_stat_records= records;
+ copy_cost= table->file->ROW_COPY_COST;
}
else
{
@@ -14937,21 +15097,38 @@ void JOIN_TAB::estimate_scan_time()
if (!table->covering_keys.is_clear_all() && ! table->no_keyread)
{
cached_covering_key= find_shortest_key(table, &table->covering_keys);
- read_time= table->file->ha_key_scan_time(cached_covering_key);
- copy_cost= KEY_COPY_COST_THD(thd);
+ read_time= table->file->ha_key_scan_time(cached_covering_key, records);
+ copy_cost= 0; // included in ha_key_scan_time
}
else
- read_time= table->file->ha_scan_time();
+ {
+ read_time= table->file->ha_scan_time(records);
+ copy_cost= 0;
+ }
}
}
else
{
+ /*
+ The following is same as calling
+ TABLE_SHARE::update_optimizer_costs, but without locks
+ */
+ if (table->s->db_type() == heap_hton)
+ memcpy(&table->s->optimizer_costs, &heap_optimizer_costs,
+ sizeof(heap_optimizer_costs));
+ else
+ memcpy(&table->s->optimizer_costs, &tmp_table_optimizer_costs,
+ sizeof(tmp_table_optimizer_costs));
+ table->file->set_optimizer_costs(thd);
+ table->s->optimizer_costs_inited=1 ;
+
records= table->stat_records();
DBUG_ASSERT(table->opt_range_condition_rows == records);
- read_time= records ? (double) records: 10.0;// TODO:fix this stub
+ read_time= table->file->ha_scan_time(MY_MAX(records, 1000)); // Needs fix..
+ copy_cost= table->s->optimizer_costs.row_copy_cost;
}
+
found_records= records;
- cached_scan_time= read_time;
cached_scan_and_compare_time= (read_time + records *
(copy_cost + WHERE_COST_THD(thd)));
}
@@ -14996,7 +15173,7 @@ ha_rows JOIN_TAB::get_examined_rows()
}
}
else
- examined_rows= records_read;
+ examined_rows= records_init;
if (examined_rows >= (double) HA_ROWS_MAX)
return HA_ROWS_MAX;
@@ -18496,7 +18673,7 @@ table_map JOIN::get_allowed_nj_tables(uint idx)
first_alt TRUE <=> Use the LooseScan plan for the first_tab
no_jbuf_before Don't allow to use join buffering before this
table
- reopt_rec_count OUT New output record count
+ outer_rec_count OUT New output record count
reopt_cost OUT New join prefix cost
DESCRIPTION
@@ -18551,6 +18728,8 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables;
join->cur_sj_inner_tables= 0;
+ double inner_fanout= 1.0;
+
for (i= first_tab; i <= last_tab; i++)
{
JOIN_TAB *rs= join->positions[i].table;
@@ -18563,31 +18742,43 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
join->positions, i,
TRUE, rec_count,
&pos, &loose_scan_pos);
+ if ((i == first_tab && first_alt))
+ pos= loose_scan_pos;
}
else
pos= join->positions[i];
- if ((i == first_tab && first_alt))
- pos= loose_scan_pos;
-
reopt_remaining_tables &= ~rs->table->map;
- rec_count= COST_MULT(rec_count, pos.records_read);
cost= COST_ADD(cost, pos.read_time);
- //TODO: take into account join condition selectivity here
- double pushdown_cond_selectivity= 1.0;
- table_map real_table_bit= rs->table->map;
- if (join->thd->variables.optimizer_use_condition_selectivity > 1)
+
+ double records_out= pos.records_out;
+ /*
+ The (i != last_tab) is here to mimic what
+ best_extension_by_limited_search() does: do not call
+ table_after_join_selectivity() for the join_tab where the semi-join
+ strategy is applied
+ */
+ if (i != last_tab &&
+ join->thd->variables.optimizer_use_condition_selectivity > 1)
{
+ table_map real_table_bit= rs->table->map;
+ double __attribute__((unused)) pushdown_cond_selectivity;
pushdown_cond_selectivity=
table_after_join_selectivity(join, i, rs,
reopt_remaining_tables &
- ~real_table_bit);
+ ~real_table_bit, &records_out);
}
- (*outer_rec_count) *= pushdown_cond_selectivity;
- if (!rs->emb_sj_nest)
- *outer_rec_count= COST_MULT(*outer_rec_count, pos.records_read);
+ rec_count= COST_MULT(rec_count, records_out);
+ *outer_rec_count= COST_MULT(*outer_rec_count, records_out);
+ if (rs->emb_sj_nest)
+ inner_fanout= COST_MULT(inner_fanout, records_out);
}
+
+ /* Discount the fanout produced by the subquery */
+ if (inner_fanout > 1.0)
+ *outer_rec_count /= inner_fanout;
+
join->cur_sj_inner_tables= save_cur_sj_inner_tables;
*reopt_cost= cost;
@@ -20828,7 +21019,7 @@ TABLE *create_tmp_table_for_schema(THD *thd, TMP_TABLE_PARAM *param,
{
TABLE *table;
Create_tmp_table maker((ORDER *) NULL, false, false,
- select_options, HA_POS_ERROR);
+ select_options, HA_ROWS_MAX);
if (!(table= maker.start(thd, param, &table_alias)) ||
maker.add_schema_fields(thd, table, param, schema_table) ||
maker.finalize(thd, table, param, do_not_open, keep_row_order))
@@ -21008,7 +21199,6 @@ bool Virtual_tmp_table::sp_set_all_fields_from_item(THD *thd, Item *value)
return false;
}
-
bool open_tmp_table(TABLE *table)
{
int error;
@@ -21022,6 +21212,7 @@ bool open_tmp_table(TABLE *table)
}
table->db_stat= HA_OPEN_KEYFILE;
(void) table->file->extra(HA_EXTRA_QUICK); /* Faster */
+ table->file->set_optimizer_costs(table->in_use);
if (!table->is_created())
{
table->set_created();
@@ -24702,31 +24893,40 @@ ok:
@return
MAX_KEY no suitable key found
key index otherwise
+
+ @notes
+ We should not use keyread_time() as in the case of disk_read_cost= 0
+ all keys would be regarded equal.
*/
uint find_shortest_key(TABLE *table, const key_map *usable_keys)
{
- double min_cost= DBL_MAX;
+ size_t min_length= INT_MAX32;
uint best= MAX_KEY;
- if (!usable_keys->is_clear_all())
+ uint possible_keys= usable_keys->bits_set();
+
+ if (possible_keys)
{
+ if (possible_keys == 1)
+ return usable_keys->find_first_bit();
+
for (uint nr=0; nr < table->s->keys ; nr++)
{
if (usable_keys->is_set(nr))
{
- double cost= table->file->ha_key_scan_time(nr);
- if (cost < min_cost)
+ size_t length= table->key_storage_length(nr);
+ if (length < min_length)
{
- min_cost= cost;
- best=nr;
+ min_length= length;
+ best= nr;
}
- DBUG_ASSERT(best < MAX_KEY);
}
}
}
return best;
}
+
/**
Test if a second key is the subkey of the first one.
@@ -28244,6 +28444,7 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
// psergey-todo: data for filtering!
tracker= &eta->tracker;
jbuf_tracker= &eta->jbuf_tracker;
+ jbuf_unpack_tracker= &eta->jbuf_unpack_tracker;
/* Enable the table access time tracker only for "ANALYZE stmt" */
if (thd->lex->analyze_stmt)
@@ -28472,12 +28673,13 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
ha_rows examined_rows= get_examined_rows();
eta->rows_set= true;
- eta->rows= examined_rows;
+ eta->rows= double_to_rows(examined_rows);
/* "filtered" */
float f= 0.0;
if (examined_rows)
{
+#ifdef OLD_CODE // QQQ
double pushdown_cond_selectivity= cond_selectivity;
if (pushdown_cond_selectivity != 1.0)
f= (float) (100.0 * pushdown_cond_selectivity);
@@ -28485,6 +28687,9 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
f= (float) (100.0 * range_rowid_filter_info->selectivity);
else
f= (float) (100.0 * records_read / examined_rows);
+#else
+ f= (float) (100.0 * records_out / examined_rows);
+#endif
}
set_if_smaller(f, 100.0);
eta->filtered_set= true;
@@ -28880,9 +29085,9 @@ int JOIN::save_explain_data_intern(Explain_query *output,
continue;
}
-
Explain_table_access *eta= (new (output->mem_root)
- Explain_table_access(output->mem_root));
+ Explain_table_access(output->mem_root,
+ thd->lex->analyze_stmt));
if (!eta)
DBUG_RETURN(1);
@@ -29922,7 +30127,7 @@ void JOIN::cache_const_exprs()
- If there is no quick select return the full cost from
cost_for_index_read() (Doing a full scan with up to 'limit' records)
- @param pos Result from best_acccess_path(). Is NULL for
+ @param pos Result from best_access_path(). Is NULL for
single-table UPDATE/DELETE
@param table Table to be sorted
@param keynr Which index to use
@@ -30008,7 +30213,7 @@ static bool get_range_limit_read_cost(const POSITION *pos,
/*
Calculate the number of rows we have to check if we are
- doing a full index scan (as a suitabe range scan was not available).
+ doing a full index scan (as a suitable range scan was not available).
We assume that each of the tested indexes is not correlated
with ref_key. Thus, to select first N records we have to scan
@@ -30197,12 +30402,12 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
trace_cheaper_ordering.add_table_name(tab);
else
trace_cheaper_ordering.add_table_name(table);
- trace_cheaper_ordering
- .add("rows_estimation", rows_estimate)
- .add("read_cost", read_time)
- .add("filesort_cost", filesort_cost)
- .add("filesort_type", filesort_names[filesort_type].str)
- .add("fanout", fanout);
+ trace_cheaper_ordering.
+ add("rows_estimation", rows_estimate).
+ add("filesort_cost", filesort_cost).
+ add("read_cost", read_time).
+ add("filesort_type", filesort_names[filesort_type].str).
+ add("fanout", fanout);
}
Json_writer_array possible_keys(thd,"possible_keys");