summaryrefslogtreecommitdiff
path: root/sql/sql_select.cc
diff options
context:
space:
mode:
authorSergei Petrunia <sergey@mariadb.com>2022-07-19 14:13:17 +0300
committerSergei Petrunia <sergey@mariadb.com>2022-07-19 15:19:54 +0300
commitac3b3807cc9b21e23e65b1a70c7dcda1f45a3c57 (patch)
tree7ee8b4cb3c71d3bb5c0f318cc62c9ce4e735bf41 /sql/sql_select.cc
parentec4e5900d0ae36fe2bc639d8ce3174b951cfb689 (diff)
downloadmariadb-git-bb-10.10-spetrunia2.tar.gz
MDEV-28929: Plan selection takes forever with MDEV-28852 ...bb-10.10-spetrunia2bb-10.10-mdev28929-v4
Part #2: Extend heuristic pruning to use multiple tables as the "Model tables". Before the patch, heuristic pruning uses only one "Model table": The table which had the best cost AND record became the "Model table". After that, if a table's cost and record were both worse than those of the Model Table, the table would be pruned away. This didn't work well when the first table (the optimizer sorts them by record_count) had low record_count but relatively high cost: nothing could be pruned afterwards. The patch adds the two additional "Model tables": one with the least cost and the other with the least record_count. (In both cases, a table can be pruned away if BOTH its cost and record_count are worse than those of a Model table) The new pruning is active when the number of tables to consider for the prefix is higher than @@optimizer_extra_pruning_depth. One can see the new pruning in the Optimizer Trace as - "pruned_by_heuristic":"min_record_count", or - "pruned_by_heuristic":"min_read_time". Old heuristic pruning shows as "pruned_by_heuristic":1.
Diffstat (limited to 'sql/sql_select.cc')
-rw-r--r--sql/sql_select.cc88
1 files changed, 70 insertions, 18 deletions
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 1fe24e2eb1c..3db50da3009 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -109,8 +109,7 @@ static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
const key_map *keys,ha_rows limit);
static void optimize_straight_join(JOIN *join, table_map join_tables);
static bool greedy_search(JOIN *join, table_map remaining_tables,
- uint depth, uint prune_level,
- uint use_cond_selectivity);
+ uint depth, uint use_cond_selectivity);
enum enum_best_search {
SEARCH_ABORT= -2,
@@ -124,7 +123,6 @@ best_extension_by_limited_search(JOIN *join,
table_map remaining_tables,
uint idx, double record_count,
double read_time, uint depth,
- uint prune_level,
uint use_cond_selectivity,
table_map *processed_eq_ref_tables);
static uint determine_search_depth(JOIN* join);
@@ -8778,7 +8776,6 @@ bool
choose_plan(JOIN *join, table_map join_tables)
{
uint search_depth= join->thd->variables.optimizer_search_depth;
- uint prune_level= join->thd->variables.optimizer_prune_level;
uint use_cond_selectivity=
join->thd->variables.optimizer_use_condition_selectivity;
bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN);
@@ -8786,6 +8783,9 @@ choose_plan(JOIN *join, table_map join_tables)
DBUG_ENTER("choose_plan");
join->cur_embedding_map= 0;
+ join->extra_heuristic_pruning= false;
+ join->prune_level= join->thd->variables.optimizer_prune_level;
+
reset_nj_counters(join, join->join_list);
qsort2_cmp jtab_sort_func;
@@ -8842,8 +8842,14 @@ choose_plan(JOIN *join, table_map join_tables)
if (search_depth == 0)
/* Automatically determine a reasonable value for 'search_depth' */
search_depth= determine_search_depth(join);
- if (greedy_search(join, join_tables, search_depth, prune_level,
- use_cond_selectivity))
+
+ if (join->prune_level >= 1 &&
+ search_depth >= thd->variables.optimizer_extra_pruning_depth)
+ {
+ join->extra_heuristic_pruning= true;
+ }
+
+ if (greedy_search(join, join_tables, search_depth, use_cond_selectivity))
DBUG_RETURN(TRUE);
}
@@ -9231,8 +9237,6 @@ optimize_straight_join(JOIN *join, table_map remaining_tables)
for the query
@param remaining_tables set of tables not included into the partial plan yet
@param search_depth controlls the exhaustiveness of the search
- @param prune_level the pruning heuristics that should be applied during
- search
@param use_cond_selectivity specifies how the selectivity of the conditions
pushed to a table should be taken into account
@@ -9246,7 +9250,6 @@ static bool
greedy_search(JOIN *join,
table_map remaining_tables,
uint search_depth,
- uint prune_level,
uint use_cond_selectivity)
{
double record_count= 1.0;
@@ -9280,7 +9283,6 @@ greedy_search(JOIN *join,
if ((int) best_extension_by_limited_search(join, remaining_tables, idx,
record_count,
read_time, search_depth,
- prune_level,
use_cond_selectivity,
&eq_ref_tables) <
(int) SEARCH_OK)
@@ -10159,8 +10161,7 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx,
When 'best_extension_by_limited_search' is called for the first time,
'join->best_read' must be set to the largest possible value (e.g. DBL_MAX).
The actual implementation provides a way to optionally use pruning
- heuristic (controlled by the parameter 'prune_level') to reduce the search
- space by skipping some partial plans.
+ heuristic to reduce the search space by skipping some partial plans.
@note
The parameter 'search_depth' provides control over the recursion
@@ -10179,8 +10180,6 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx,
@param search_depth maximum depth of the recursion and thus size of the
found optimal plan
(0 < search_depth <= join->tables+1).
- @param prune_level pruning heuristics that should be applied during
- optimization
(values: 0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
@param use_cond_selectivity specifies how the selectivity of the conditions
pushed to a table should be taken into account
@@ -10203,7 +10202,6 @@ best_extension_by_limited_search(JOIN *join,
double record_count,
double read_time,
uint search_depth,
- uint prune_level,
uint use_cond_selectivity,
table_map *processed_eq_ref_tables)
{
@@ -10270,7 +10268,7 @@ best_extension_by_limited_search(JOIN *join,
Json_writer_array arr(thd, "get_costs_for_tables");
- if (idx > join->const_tables && prune_level >= 2 &&
+ if (idx > join->const_tables && join->prune_level >= 2 &&
join->positions[idx-1].type == JT_EQ_REF &&
(join->eq_ref_tables & allowed_current_tables))
{
@@ -10312,6 +10310,12 @@ best_extension_by_limited_search(JOIN *join,
join->sort_positions + join->sort_space);
accepted_tables= 0;
+ double min_rec_count= DBL_MAX;
+ double min_rec_count_read_time= DBL_MAX;
+
+ double min_cost= DBL_MAX;
+ double min_cost_record_count= DBL_MAX;
+
for (SORT_POSITION *pos= sort ; pos < sort_end ; pos++)
{
s= *pos->join_tab;
@@ -10375,8 +10379,31 @@ best_extension_by_limited_search(JOIN *join,
Prune some less promising partial plans. This heuristic may miss
the optimal QEPs, thus it results in a non-exhaustive search.
*/
- if (prune_level >= 1)
+ if (join->prune_level >= 1)
{
+ // Collect the members with min_cost and min_read_time.
+ bool min_rec_hit= false;
+ bool min_cost_hit= false;
+
+ if (join->extra_heuristic_pruning &&
+ (!(position->key_dependent & allowed_tables) ||
+ position->records_read < 2.0))
+ {
+ if (current_record_count < min_rec_count)
+ {
+ min_rec_count= current_record_count;
+ min_rec_count_read_time= current_read_time;
+ min_rec_hit= true;
+ }
+
+ if (current_read_time < min_cost)
+ {
+ min_cost_record_count= current_record_count;
+ min_cost= current_read_time;
+ min_cost_hit= true;
+ }
+ }
+
if (best_record_count > current_record_count ||
best_read_time > current_read_time ||
(idx == join->const_tables && // 's' is the first table in the QEP
@@ -10401,6 +10428,13 @@ best_extension_by_limited_search(JOIN *join,
}
else
{
+ /*
+ Typically, we get here if:
+ best_record_count < current_record_count &&
+ best_read_time < current_read_time
+ That is, both record_count and read_time are worse than the best_
+ ones. This plan doesn't look promising, prune it away.
+ */
DBUG_EXECUTE("opt", print_plan(join, idx+1,
current_record_count,
read_time,
@@ -10411,6 +10445,25 @@ best_extension_by_limited_search(JOIN *join,
restore_prev_sj_state(remaining_tables, s, idx);
continue;
}
+
+ const char* prune_reason= NULL;
+ if (!min_rec_hit &&
+ current_record_count >= min_rec_count &&
+ current_read_time >= min_rec_count_read_time)
+ prune_reason= "min_record_count";
+
+ if (!min_cost_hit &&
+ current_record_count >= min_cost_record_count &&
+ current_read_time >= min_cost)
+ prune_reason= "min_read_time";
+
+ if (prune_reason)
+ {
+ trace_one_table.add("pruned_by_heuristic", prune_reason);
+ restore_prev_nj_state(s);
+ restore_prev_sj_state(remaining_tables, s, idx);
+ continue;
+ }
}
double pushdown_cond_selectivity= 1.0;
@@ -10448,7 +10501,6 @@ best_extension_by_limited_search(JOIN *join,
partial_join_cardinality,
current_read_time,
search_depth - 1,
- prune_level,
use_cond_selectivity,
&found_eq_ref_tables);
swap_variables(JOIN_TAB*, join->best_ref[idx], *pos->join_tab);