Added 'records_out' and join_type to POSITION

records_out is the numbers of rows expected to be accepted from a table. records_read is in contrast the number of rows that the optimizer excepts to read from the engine. This patch causes not plan changes. The differences in test results comes from renaming "records" to "records_read" and printing of record_out in the optimizer trace. Other things: - Renamed table_cond_selectivity() to table_after_join_selectivity() to make the purpose of the function more clear.
author: Monty <monty@mariadb.org> 2022-04-11 17:59:36 +0300
committer: Sergei Petrunia <sergey@mariadb.com> 2023-02-02 22:25:24 +0300
commit: 2387ee9b4556cf5f4730ea7033a30977273173ad (patch)
tree: 5c518a9ef3b9ec80c1533b371137aa9344c23f92 /sql
parent: 9db877c9ec6fb048e213cc824643ed7f074d9e40 (diff)
download: mariadb-git-2387ee9b4556cf5f4730ea7033a30977273173ad.tar.gz
7 files changed, 109 insertions, 39 deletions
diff --git a/sql/opt_trace.cc b/sql/opt_trace.cc
index 744959eb164..374fc41aba8 100644
--- a/sql/opt_trace.cc
+++ b/sql/opt_trace.cc
@@ -689,15 +689,15 @@ void print_final_join_order(JOIN *join)
 }
 
 
-void print_best_access_for_table(THD *thd, POSITION *pos,
-                                 enum join_type type)
+void print_best_access_for_table(THD *thd, POSITION *pos)
 {
   DBUG_ASSERT(thd->trace_started());
 
   Json_writer_object obj(thd, "chosen_access_method");
   obj.
-    add("type", type == JT_ALL ? "scan" : join_type_str[type]).
-    add("records", pos->records_read).
+    add("type", pos->type == JT_ALL ? "scan" : join_type_str[pos->type]).
+    add("records_read", pos->records_read).
+    add("records_out", pos->records_out).
     add("cost", pos->read_time).
     add("uses_join_buffering", pos->use_join_buffer);
   if (pos->range_rowid_filter_info)
diff --git a/sql/opt_trace.h b/sql/opt_trace.h
index 1ee23a33591..a43c1dde54b 100644
--- a/sql/opt_trace.h
+++ b/sql/opt_trace.h
@@ -109,8 +109,7 @@ void opt_trace_print_expanded_query(THD *thd, SELECT_LEX *select_lex,
 void add_table_scan_values_to_trace(THD *thd, JOIN_TAB *tab);
 void trace_plan_prefix(JOIN *join, uint idx, table_map join_tables);
 void print_final_join_order(JOIN *join);
-void print_best_access_for_table(THD *thd, POSITION *pos,
-                                 enum join_type type);
+void print_best_access_for_table(THD *thd, POSITION *pos);
 
 void trace_condition(THD * thd, const char *name, const char *transform_type,
                     Item *item, const char *table_name= nullptr);
diff --git a/sql/rowid_filter.cc b/sql/rowid_filter.cc
index 1926b43188a..c0f7fe0755a 100644
--- a/sql/rowid_filter.cc
+++ b/sql/rowid_filter.cc
@@ -460,10 +460,16 @@ void Range_rowid_filter_cost_info::trace_info(THD *thd)
   @brief
     Choose the best range filter for the given access of the table
 
-  @param access_key_no    The index by which the table is accessed
-  @param records   The estimated total number of key tuples with this access
-  @param access_cost_factor the cost of a random seek to access the table
-
+  @param access_key_no      The index by which the table is accessed
+  @param records            The estimated total number of key tuples with
+                            this access
+  @param fetch_cost_factor  The cost of fetching 'records' rows
+  @param index_only_cost    The cost of fetching 'records' rows with
+                            index only reads
+  @param prev_records       How many row combinations we have in
+                            preceding tables
+  @parma records_out        Will be updated to the minimum result rows for any
+                            usable filter.
   @details
     The function looks through the array of cost info for range filters
     and chooses the element for the range filter that promise the greatest
@@ -478,7 +484,8 @@ TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no,
                                                 double records,
                                                 double fetch_cost,
                                                 double index_only_cost,
-                                                double prev_records)
+                                                double prev_records,
+                                                double *records_out)
 {
   if (range_rowid_filter_cost_info_elems == 0 ||
       covering_keys.is_set(access_key_no))
@@ -521,13 +528,14 @@ TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no,
       continue;
 
     new_records= records * filter->selectivity;
+    set_if_smaller(*records_out, new_records);
     cost_of_accepted_rows= fetch_cost * filter->selectivity;
     cost_of_rejected_rows= index_only_cost * (1 - filter->selectivity);
     new_cost= (cost_of_accepted_rows + cost_of_rejected_rows +
                records * filter->lookup_cost());
     new_total_cost= ((new_cost + new_records *
-                      in_use->variables.optimizer_where_cost) * prev_records +
-                     filter->get_setup_cost());
+                      in_use->variables.optimizer_where_cost) *
+                     prev_records + filter->get_setup_cost());
 
     if (best_filter_gain > new_total_cost)
     {
diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h
index dc64dc876ce..f7386631c3d 100644
--- a/sql/rowid_filter.h
+++ b/sql/rowid_filter.h
@@ -491,7 +491,8 @@ public:
                                                   double records,
                                                   double fetch_cost,
                                                   double index_only_cost,
-                                                  double prev_records);
+                                                  double prev_records,
+                                                  double *records_out);
   Range_rowid_filter_cost_info *
     apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg,
                  double *startup_cost, double fetch_cost,
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 7060da031a7..11c3299a8d8 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -89,6 +89,18 @@
 */
 #define HASH_FANOUT 0.1
 
+/*
+  The following is used to check that A <= B, but with some margin as the
+  calculation is done slightly differently (mathematically correct, but
+  double calculations are not exact).
+  This is only used when comparing read rows and output rows, which
+  means that we can assume that both values are >= 0 and B cannot be notable
+  smaller than A.
+*/
+
+#define crash_if_first_double_is_bigger(A,B) DBUG_ASSERT(((A) == 0.0 && (B) == 0.0) || (A)/(B) <  1.0000001)
+
+
 /* Cost for reading a row through an index */
 struct INDEX_READ_COST
 {
@@ -318,7 +330,7 @@ static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab,
 static bool find_order_in_list(THD *, Ref_ptr_array, TABLE_LIST *, ORDER *,
                                List<Item> &, List<Item> &, bool, bool, bool);
 
-static double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
+static double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
                                      table_map rem_tables);
 void set_postjoin_aggr_write_func(JOIN_TAB *tab);
 
@@ -421,7 +433,7 @@ bool dbug_user_var_equals_str(THD *thd, const char *name, const char* value)
 POSITION::POSITION()
 {
   table= 0;
-  records_read= cond_selectivity= read_time= 0.0;
+  records_read= cond_selectivity= read_time= records_out= 0.0;
   prefix_record_count= 0.0;
   key= 0;
   use_join_buffer= 0;
@@ -5818,6 +5830,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
         Json_writer_object table_records(thd);
         /* Only one matching row */
         s->found_records= s->records= 1;
+        s->records_out= 1.0;
         s->read_time=1.0;
         s->worst_seeks=1.0;
         table_records.add_table_name(s)
@@ -7686,6 +7699,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
   join->positions[idx].table= table;
   join->positions[idx].key=key;
   join->positions[idx].records_read=1.0;	/* This is a const table */
+  join->positions[idx].records_out=1.0;	        /* This is a const table */
   join->positions[idx].cond_selectivity= 1.0;
   join->positions[idx].ref_depend_map= 0;
 
@@ -8022,6 +8036,7 @@ best_access_path(JOIN      *join,
   my_bool found_constraint= 0;
   double best_cost=         DBL_MAX;
   double records=           DBL_MAX;
+  double records_out=       table->stat_records() * table->cond_selectivity;
   table_map best_ref_depends_map= 0;
   /*
     key_dependent is 0 if all key parts could be used or if there was an
@@ -8317,9 +8332,12 @@ best_access_path(JOIN      *join,
                    (1.0 +
                     ((double) (table->s->max_key_length-keyinfo->key_length) /
                      (double) table->s->max_key_length)));
+                set_if_smaller(records, (double)s->records);
+                set_if_smaller(records_out, records);
                 if (records < 2.0)
                   records=2.0;               /* Can't be as good as a unique */
               }
+
               /*
                 ReuseRangeEstimateForRef-2:  We get here if we could not reuse
                 E(#rows) from range optimizer. Make another try:
@@ -8357,9 +8375,10 @@ best_access_path(JOIN      *join,
               }
             }
             /* Calculate the cost of the index access */
-            INDEX_READ_COST cost= cost_for_index_read(thd, table, key,
-                                                      (ha_rows) records,
-                                                      (ha_rows) s->worst_seeks);
+            INDEX_READ_COST cost=
+              cost_for_index_read(thd, table, key,
+                                  (ha_rows) records,
+                                  (ha_rows) s->worst_seeks);
             tmp= cost.read_cost;
             index_only_cost= cost.index_only_cost;
           }
@@ -8631,7 +8650,8 @@ best_access_path(JOIN      *join,
                                                           records,
                                                           tmp,
                                                           index_only_cost,
-                                                          record_count);
+                                                          record_count,
+                                                          &records_out);
         if (filter)
           filter= filter->apply_filter(thd, table, &tmp, &records_after_filter,
                                        &startup_cost,
@@ -8673,6 +8693,7 @@ best_access_path(JOIN      *join,
           add("chosen", false).
           add("cause", cause ? cause : "cost");
       }
+      set_if_smaller(records_out, records);
     } /* for each key */
     records= best_records;
   }
@@ -8726,6 +8747,8 @@ best_access_path(JOIN      *join,
     /* Estimate the cost of the hash join access to the table */
     double rnd_records= matching_candidates_in_table(s, found_constraint,
                                                      use_cond_selectivity);
+    set_if_smaller(records_out, rnd_records);
+
     /*
       The following cost calculation is identical to the cost calculation for
       the join cache later on, except for the HASH_FANOUT
@@ -8876,7 +8899,8 @@ best_access_path(JOIN      *join,
         table->best_range_rowid_filter_for_partial_join(key_no, rows2double(range->rows),
                                                         range->find_cost,
                                                         range->index_only_cost,
-                                                        record_count);
+                                                        record_count,
+                                                        &records_out);
         if (filter)
         {
           double filter_cost= range->fetch_cost;
@@ -8905,6 +8929,7 @@ best_access_path(JOIN      *join,
       {
         type= JT_INDEX_MERGE;
       }
+      set_if_smaller(records_out, records_after_filter);
       loose_scan_opt.check_range_access(join, idx, s->quick);
     }
     else
@@ -8913,7 +8938,10 @@ best_access_path(JOIN      *join,
       rnd_records= matching_candidates_in_table(s, found_constraint,
                                                 use_cond_selectivity);
       records_after_filter= rnd_records;
+      set_if_smaller(records_out, rnd_records);
+
       org_records= rows2double(s->records);
+
       DBUG_ASSERT(rnd_records <= s->records);
 
       /* Estimate cost of reading table. */
@@ -9043,7 +9071,9 @@ best_access_path(JOIN      *join,
   }
 
   /* Update the cost information for the current partial plan */
+  crash_if_first_double_is_bigger(records_out, records);
   pos->records_read= records;
+  pos->records_out=  records_out;
   pos->read_time=    best_cost;
   pos->key=          best_key;
   pos->type=         best_type;
@@ -9070,7 +9100,7 @@ best_access_path(JOIN      *join,
   trace_paths.end();
 
   if (unlikely(thd->trace_started()))
-    print_best_access_for_table(thd, pos, best_type);
+    print_best_access_for_table(thd, pos);
 
   DBUG_VOID_RETURN;
 }
@@ -9575,8 +9605,8 @@ optimize_straight_join(JOIN *join, table_map remaining_tables)
     remaining_tables&= ~(s->table->map);
     double pushdown_cond_selectivity= 1.0;
     if (use_cond_selectivity > 1)
-      pushdown_cond_selectivity= table_cond_selectivity(join, idx, s,
-                                                        remaining_tables);
+      pushdown_cond_selectivity= table_after_join_selectivity(join, idx, s,
+                                                              remaining_tables);
     position->cond_selectivity= pushdown_cond_selectivity;
     ++idx;
   }
@@ -10113,8 +10143,8 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
 */
 
 static
-double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
-                              table_map rem_tables)
+double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
+                                    table_map rem_tables)
 {
   uint16 ref_keyuse_steps_buf[MAX_REF_PARTS];
   uint   ref_keyuse_size= MAX_REF_PARTS;
@@ -10890,9 +10920,10 @@ best_extension_by_limited_search(JOIN      *join,
 
       pushdown_cond_selectivity= 1.0;
       if (use_cond_selectivity > 1)
-        pushdown_cond_selectivity= table_cond_selectivity(join, idx, s,
-                                                          remaining_tables &
-                                                          ~real_table_bit);
+        pushdown_cond_selectivity=
+          table_after_join_selectivity(join, idx, s,
+                                       remaining_tables & ~real_table_bit);
+
       join->positions[idx].cond_selectivity= pushdown_cond_selectivity;
 
       partial_join_cardinality= (current_record_count *
@@ -10903,8 +10934,9 @@ best_extension_by_limited_search(JOIN      *join,
           .add("selectivity", pushdown_cond_selectivity)
           .add("estimated_join_cardinality", partial_join_cardinality);
 
-      if ((search_depth > 1) &&
+      if (search_depth > 1 &&
           ((remaining_tables & ~real_table_bit) & allowed_tables))
+
       {
         /* Recursively expand the current partial plan */
         Json_writer_array trace_rest(thd, "rest_of_plan");
@@ -11715,6 +11747,7 @@ bool JOIN::get_best_combination()
       */
       SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info;
       j->records_read= (sjm->is_sj_scan? sjm->rows : 1.0);
+      j->records_out= j->records_read;
       j->records= (ha_rows) j->records_read;
       j->cond_selectivity= 1.0;
       JOIN_TAB *jt;
@@ -11770,8 +11803,15 @@ bool JOIN::get_best_combination()
       to access join->best_positions[]. 
     */
     j->records_read= best_positions[tablenr].records_read;
+    j->records_out=  best_positions[tablenr].records_out;
     j->cond_selectivity= best_positions[tablenr].cond_selectivity;
     DBUG_ASSERT(j->cond_selectivity <= 1.0);
+    crash_if_first_double_is_bigger(j->records_out,
+                                    j->records_read *
+                                    (j->range_rowid_filter_info ?
+                                     j->range_rowid_filter_info->selectivity :
+                                     1.0));
+
     map2table[j->table->tablenr]= j;
 
     /* If we've reached the end of sjm nest, switch back to main sequence */
@@ -13071,7 +13111,12 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 
 	    /* Fix for EXPLAIN */
 	    if (sel->quick)
-	      join->best_positions[i].records_read= (double)sel->quick->records;
+            {
+	      join->best_positions[i].records_read=
+                (double) sel->quick->records;
+              set_if_smaller(join->best_positions[i].records_out,
+                             join->best_positions[i].records_read);
+            }
 	  }
 	  else
 	  {
@@ -18542,9 +18587,10 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
     table_map real_table_bit= rs->table->map;
     if (join->thd->variables.optimizer_use_condition_selectivity > 1)
     {
-      pushdown_cond_selectivity= table_cond_selectivity(join, i, rs,
-                                                        reopt_remaining_tables &
-                                                        ~real_table_bit);
+      pushdown_cond_selectivity=
+        table_after_join_selectivity(join, i, rs,
+                                     reopt_remaining_tables &
+                                     ~real_table_bit);
     }
     (*outer_rec_count) *= pushdown_cond_selectivity;
     if (!rs->emb_sj_nest)
@@ -22673,7 +22719,7 @@ join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos)
     {						// Info for DESCRIBE
       tab->info= ET_CONST_ROW_NOT_FOUND;
       /* Mark for EXPLAIN that the row was not found */
-      pos->records_read=0.0;
+      pos->records_read= pos->records_out= 0.0;
       pos->ref_depend_map= 0;
       if (!table->pos_in_table_list->outer_join || error > 0)
 	DBUG_RETURN(error);
@@ -22691,7 +22737,7 @@ join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos)
     {
       tab->info= ET_UNIQUE_ROW_NOT_FOUND;
       /* Mark for EXPLAIN that the row was not found */
-      pos->records_read=0.0;
+      pos->records_read= pos->records_out= 0.0;
       pos->ref_depend_map= 0;
       if (!table->pos_in_table_list->outer_join || error > 0)
 	DBUG_RETURN(error);
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 106746b35de..ec13eecd4d6 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -343,7 +343,10 @@ typedef struct st_join_table {
   
   /* Copy of POSITION::records_read, set by get_best_combination() */
   double        records_read;
-  
+
+  /* Copy of POSITION::records_out, set by get_best_combination() */
+  double        records_out;
+
   /* The selectivity of the conditions that can be pushed to the table */ 
   double        cond_selectivity;  
   
@@ -939,11 +942,22 @@ public:
   JOIN_TAB *table;
 
   /*
+    The number of rows that will be read from the table
+  */
+  double records_read;
+
+  /*
     The "fanout": number of output rows that will be produced (after
     pushed down selection condition is applied) per each row combination of
     previous tables.
+
+    This takes into account table->cond_selectivity, the WHERE clause
+    related to this table calculated in
+    calculate_cond_selectivity_for_table(), and the used rowid filter but
+    does not take into account the WHERE clause involving preceding tables
+    calculated in table_after_join_selectivity().
   */
-  double records_read;
+  double records_out;
 
   /* The selectivity of the pushed down conditions */
   double cond_selectivity;
@@ -1007,6 +1021,7 @@ public:
 
   /* Type of join (EQ_REF, REF etc) */
   enum join_type type;
+
   /*
     Valid only after fix_semijoin_strategies_for_picked_join_order() call:
     if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
diff --git a/sql/table.h b/sql/table.h
index bd1816175df..4350c33d389 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -1810,7 +1810,8 @@ public:
                                            double records,
                                            double fetch_cost,
                                            double index_only_cost,
-                                           double prev_records);
+                                           double prev_records,
+                                           double *records_out);
   /**
     System Versioning support
    */
author	Monty <monty@mariadb.org>	2022-04-11 17:59:36 +0300
committer	Sergei Petrunia <sergey@mariadb.com>	2023-02-02 22:25:24 +0300
commit	2387ee9b4556cf5f4730ea7033a30977273173ad (patch)
tree	5c518a9ef3b9ec80c1533b371137aa9344c23f92 /sql
parent	9db877c9ec6fb048e213cc824643ed7f074d9e40 (diff)
download	mariadb-git-2387ee9b4556cf5f4730ea7033a30977273173ad.tar.gz