Add limits for how many IO operations a table access will do

This solves the current problem in the optimizer - SELECT FROM big_table - SELECT from small_table where small_table.eq_ref_key=big_table.id The old code assumed that each eq_ref access will cause an IO. As the cost of IO is high, this dominated the cost for the later table which caused the optimizer to prefer table scans + join cache over index reads. This patch fixes this issue by limit the number of expected IO calls, for rows and index separately, to the size of the table or index or the number of accesses that we except in a range for the index. The major changes are: - Adding a new structure ALL_READ_COST that is mainly used in best_access_path() to hold the costs parts of the cost we are calculating. This allows us to limit the number of IO when multiplying the cost with the previous row combinations. - All storage engine cost functions are changed to return IO_AND_CPU_COST. The virtual cost functions should now return in IO_AND_CPU_COST.io the number of disk blocks that will be accessed instead of the cost of the access. - We are not limiting the io_blocks for table or index scans as we assume that engines may not store these in the 'hot' part of the cache. Table and index scan also uses much less IO blocks than key accesses, so the original issue is not as critical with scans. Other things: OPT_RANGE now holds a 'Cost_estimate cost' instead a lot of different costs. All the old costs, like index_only_read, can be extracted from 'cost'. - Added to the start of some functions 'handler *file= table->file' to shorten the code that is using the handler. - handler->cost() is used to change a ALL_READ_COST or IO_AND_CPU_COST to 'cost in milliseconds' - New functions: handler::index_blocks() and handler::row_blocks() which are used to limit the IO. - Added index_cost and row_cost to Cost_estimate and removed all not needed members. - Removed cost coefficients from Cost_estimate as these don't make sense when costs (except IO_BLOCKS) are in milliseconds. - Removed handler::avg_io_cost() and replaced it with DISK_READ_COST. - Renamed best_range_rowid_filter_for_partial_join() to best_range_rowid_filter() as using the old name made rows too long. - Changed all SJ_MATERIALIZATION_INFO 'Cost_estimate' variables to 'double' as Cost_estimate power was not used for these and thus just caused storage and performance overhead. - Changed cost_for_index_read() to use 'worst_seeks' to only limit IO, not number of table accesses. With this patch worst_seeks is probably not needed anymore, but I kept it around just in case. - Applying cost for filter got to be much shorter and easier thanks to the API changes. - Adjusted cost for fulltext keys in collaboration with Sergei Golubchik. - Most test changes caused by this patch is that table scans are changed to use indexes. - Added ha_seq::keyread_time() and ha_seq::key_scan_time() to get make checking number of potential IO blocks easier during debugging.
author: Monty <monty@mariadb.org> 2022-09-30 17:10:37 +0300
committer: Sergei Petrunia <sergey@mariadb.com> 2023-02-02 23:57:30 +0300
commit: d9d0e78039fd3fbeac814edd27fabfe3e4450bc5 (patch)
tree: 8881fa18f520319e112fc0b3a9b3a00a0eb56737
parent: 7afa819f727144e8a107e28444e07d54045ab38e (diff)
download: mariadb-git-d9d0e78039fd3fbeac814edd27fabfe3e4450bc5.tar.gz
29 files changed, 605 insertions, 518 deletions
diff --git a/sql/filesort_utils.cc b/sql/filesort_utils.cc
index 1aa17deb16e..e1cd7c566bb 100644
--- a/sql/filesort_utils.cc
+++ b/sql/filesort_utils.cc
@@ -237,9 +237,10 @@ void Sort_costs::compute_pq_sort_costs(Sort_param *param, ha_rows num_rows,
 
   if (queue_size < num_available_keys)
   {
+    handler *file= param->sort_form->file;
     costs[PQ_SORT_ORDER_BY_FIELDS]=
       get_pq_sort_cost(num_rows, queue_size, false) +
-      param->sort_form->file->ha_rnd_pos_call_time(MY_MIN(queue_size - 1, num_rows));
+      file->cost(file->ha_rnd_pos_call_time(MY_MIN(queue_size - 1, num_rows)));
   }
 
   /* Calculate cost with addon fields */
@@ -270,14 +271,15 @@ void Sort_costs::compute_merge_sort_costs(Sort_param *param,
   costs[MERGE_SORT_ORDER_BY_FIELDS]= DBL_MAX;
 
   if (num_available_keys)
+  {
+    handler *file= param->sort_form->file;
     costs[MERGE_SORT_ORDER_BY_FIELDS]=
       get_merge_many_buffs_cost_fast(num_rows, num_available_keys,
                                      row_length, DEFAULT_KEY_COMPARE_COST,
                                      default_optimizer_costs.disk_read_cost,
                                      false) +
-      param->sort_form->file->ha_rnd_pos_call_time(MY_MIN(param->limit_rows,
-                                                          num_rows));
-
+      file->cost(file->ha_rnd_pos_call_time(MY_MIN(param->limit_rows, num_rows)));
+  }
   if (with_addon_fields)
   {
     /* Compute cost of merge sort *if* we strip addon fields. */
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index 30c1832fadc..08e3b45e02b 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -6629,7 +6629,7 @@ ha_rows ha_partition::multi_range_read_info_const(uint keyno,
       ha_rows tmp_rows;
       uint tmp_mrr_mode;
       m_mrr_buffer_size[i]= 0;
-      part_cost.reset();
+      part_cost.reset(*file);
       tmp_mrr_mode= *mrr_mode;
       tmp_rows= (*file)->
         multi_range_read_info_const(keyno, &m_part_seq_if,
@@ -6680,7 +6680,7 @@ ha_rows ha_partition::multi_range_read_info(uint keyno, uint n_ranges,
     {
       ha_rows tmp_rows;
       m_mrr_buffer_size[i]= 0;
-      part_cost.reset();
+      part_cost.reset(*file);
       if ((tmp_rows= (*file)->multi_range_read_info(keyno, n_ranges, keys,
                                                     key_parts,
                                                     &m_mrr_buffer_size[i],
diff --git a/sql/handler.cc b/sql/handler.cc
index a9c832c9274..7a8e4d6b036 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -3283,6 +3283,23 @@ LEX_CSTRING *handler::engine_name()
   return hton_name(ht);
 }
 
+
+/*
+  Calclate the number of index blocks we are going to access when
+  doing 'ranges' index dives reading a total of 'rows' rows.
+*/
+
+ulonglong handler::index_blocks(uint index, uint ranges, ha_rows rows)
+{
+  if (!stats.block_size)
+    return 0;                                   // No disk storage
+  size_t len= table->key_storage_length(index);
+  ulonglong blocks= (rows * len / INDEX_BLOCK_FILL_FACTOR_DIV *
+                     INDEX_BLOCK_FILL_FACTOR_MUL) / stats.block_size + ranges;
+  return blocks * stats.block_size / IO_SIZE;
+}
+
+
 /*
   Calculate cost for an index scan for given index and number of records.
 
@@ -3329,7 +3346,7 @@ IO_AND_CPU_COST handler::keyread_time(uint index, ulong ranges, ha_rows rows,
     else
       io_blocks= blocks * stats.block_size / IO_SIZE;
   }
-  cost.io= (double) io_blocks * avg_io_cost();
+  cost.io=  (double) io_blocks;
   cost.cpu= blocks * INDEX_BLOCK_COPY_COST;
   return cost;
 }
@@ -3342,36 +3359,35 @@ IO_AND_CPU_COST handler::keyread_time(uint index, ulong ranges, ha_rows rows,
   in which case there should an additional rnd_pos_time() cost.
 */
 
-double handler::ha_keyread_time(uint index, ulong ranges, ha_rows rows,
-                                ulonglong blocks)
+IO_AND_CPU_COST handler::ha_keyread_time(uint index, ulong ranges,
+                                         ha_rows rows,
+                                         ulonglong blocks)
 {
   if (rows < ranges)
     rows= ranges;
   IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks);
-  return (cost.io * DISK_READ_RATIO +
-          cost.cpu + ranges * KEY_LOOKUP_COST +
-          (rows - ranges) * KEY_NEXT_FIND_COST);
+  cost.cpu+= ranges * KEY_LOOKUP_COST + (rows - ranges) * KEY_NEXT_FIND_COST;
+  return cost;
 }
 
 
 /*
-  Read a row from a clustered index
+  Read rows from a clustered index
 
-  Cost is similar to ha_rnd_pos_call_time() as a index_read() on a clusterd
+  Cost is similar to ha_rnd_pos_call_time() as a index_read() on a clustered
   key has identical code as rnd_pos() (At least in InnoDB:)
 */
 
-double handler::ha_keyread_clustered_and_copy_time(uint index, ulong ranges,
-                                                   ha_rows rows,
-                                                   ulonglong blocks)
+IO_AND_CPU_COST
+handler::ha_keyread_clustered_time(uint index, ulong ranges,
+                                   ha_rows rows,
+                                   ulonglong blocks)
 {
   if (rows < ranges)
     rows= ranges;
   IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks);
-  return (cost.io * DISK_READ_RATIO +
-          cost.cpu + ranges * ROW_LOOKUP_COST +
-          (rows - ranges) * ROW_NEXT_FIND_COST +
-          rows * ROW_COPY_COST);
+  cost.cpu+= (ranges * ROW_LOOKUP_COST + (rows - ranges) * ROW_NEXT_FIND_COST);
+  return cost;
 }
 
 THD *handler::ha_thd(void) const
diff --git a/sql/handler.h b/sql/handler.h
index 9ad868a7692..01998189921 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -2777,25 +2777,50 @@ typedef struct st_range_seq_if
 
 typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, range_id_t range_info);
 
+
+/* Separated costs for IO and CPU */
+
+struct IO_AND_CPU_COST
+{
+  double io;
+  double cpu;
+
+  void add(IO_AND_CPU_COST cost)
+  {
+    io+= cost.io;
+    cpu+= cost.cpu;
+  }
+};
+
+/* Cost for reading a row through an index */
+struct ALL_READ_COST
+{
+  IO_AND_CPU_COST index_cost, row_cost;
+  longlong max_index_blocks, max_row_blocks;
+  /* index_only_read = index_cost + copy_cost */
+  double   copy_cost;
+
+  void reset()
+  {
+    row_cost= {0,0};
+    index_cost= {0,0};
+    max_index_blocks= max_row_blocks= 0;
+    copy_cost= 0.0;
+  }
+};
+
+
 class Cost_estimate
 { 
 public:
-  double io_count;        /* number of I/O to fetch records                */
   double avg_io_cost;     /* cost of an average I/O oper. to fetch records */
-  double idx_io_count;    /* number of I/O to read keys                    */
-  double idx_avg_io_cost; /* cost of an average I/O oper. to fetch records */
-  double cpu_cost;        /* Cost of reading the rows based on a key       */
-  double idx_cpu_cost;    /* Cost of reading the key from the index tree   */
-  double import_cost;     /* cost of remote operations     */
+  double cpu_cost;        /* Cpu cost unrelated to engine costs */
   double comp_cost;       /* Cost of comparing found rows with WHERE clause */
   double copy_cost;       /* Copying the data to 'record' */
-  double mem_cost;        /* cost of used memory           */
   double limit_cost;      /* Total cost when restricting rows with limit */
 
-  static constexpr double IO_COEFF= 1;
-  static constexpr double CPU_COEFF= 1;
-  static constexpr double MEM_COEFF= 1;
-  static constexpr double IMPORT_COEFF= 1;
+  IO_AND_CPU_COST index_cost;
+  IO_AND_CPU_COST row_cost;
 
   Cost_estimate()
   {
@@ -2809,103 +2834,67 @@ public:
 
   double total_cost() const
   {
-    return IO_COEFF*io_count*avg_io_cost +
-           IO_COEFF*idx_io_count*idx_avg_io_cost +
-           CPU_COEFF*(cpu_cost + idx_cpu_cost + comp_cost + copy_cost) +
-           MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost;
+    DBUG_ASSERT(avg_io_cost != 0.0 || index_cost.io + row_cost.io == 0);
+    return ((index_cost.io + row_cost.io) * avg_io_cost+
+            index_cost.cpu + row_cost.cpu + comp_cost + copy_cost +
+            cpu_cost);
   }
 
-  /*
-    Cost of fetching a key and use the key to find a row (if not clustered or
-     covering key). Does not include row copy or compare with WHERE clause.
-  */
-  double find_cost() const
+  /* Cost for just fetching and copying a row (no compare costs) */
+  double fetch_cost() const
   {
-    return IO_COEFF*io_count*avg_io_cost +
-           IO_COEFF*idx_io_count*idx_avg_io_cost +
-           CPU_COEFF*(cpu_cost + idx_cpu_cost) +
-           MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost;
+    DBUG_ASSERT(avg_io_cost != 0.0 || index_cost.io + row_cost.io == 0);
+    return ((index_cost.io + row_cost.io) * avg_io_cost+
+            index_cost.cpu + row_cost.cpu + copy_cost);
   }
 
   /*
-    Cost of comparing the row with the WHERE clause
+    Cost of copying the row or key to 'record'
   */
-  inline double compare_cost() const
+  inline double data_copy_cost() const
   {
-    return CPU_COEFF*comp_cost;
+    return copy_cost;
   }
 
   /*
-    Cost of copying the row or key to 'record'
+    Multiply costs to simulate a scan where we read
+    We assume that io blocks will be cached and we only
+    allocate memory once. There should also be no import_cost
+    that needs to be done multiple times
   */
-  inline double data_copy_cost() const
+  void multiply(uint n)
   {
-    return CPU_COEFF*copy_cost;
+    index_cost.io*=  n;
+    index_cost.cpu*= n;
+    row_cost.io*=    n;
+    row_cost.cpu*=   n;
+    copy_cost*=      n;
+    comp_cost*=      n;
+    cpu_cost*=       n;
   }
 
-  /* Cost of finding an index entry, without copying or comparing it */
-  double index_only_cost()
+  void add(Cost_estimate *cost)
   {
-    return IO_COEFF*idx_io_count*idx_avg_io_cost +
-           CPU_COEFF*idx_cpu_cost;
+    DBUG_ASSERT(cost->avg_io_cost != 0.0 || (index_cost.io + row_cost.io == 0));
+    avg_io_cost=     cost->avg_io_cost;
+    index_cost.io+=  cost->index_cost.io;
+    index_cost.cpu+= cost->index_cost.cpu;
+    row_cost.io+=    cost->row_cost.io;
+    row_cost.cpu+=   cost->row_cost.cpu;
+    copy_cost+=      cost->copy_cost;
+    comp_cost+=      cost->comp_cost;
+    cpu_cost+=       cost->cpu_cost;
   }
 
   inline void reset()
   {
-    avg_io_cost= 1.0;
-    idx_avg_io_cost= 1.0;
-    io_count= idx_io_count= cpu_cost= idx_cpu_cost= mem_cost= import_cost= 0.0;
-    comp_cost= copy_cost= limit_cost= 0.0;
-  }
-
-  void multiply(double m)
-  {
-    io_count *= m;
-    cpu_cost *= m;
-    idx_io_count *= m;
-    idx_cpu_cost *= m;
-    import_cost *= m;
-    comp_cost *= m;
-    limit_cost*= m;
-    /* Don't multiply mem_cost */
-  }
-
-  void add(const Cost_estimate* cost)
-  {
-    if (cost->io_count != 0.0)
-    {
-      double io_count_sum= io_count + cost->io_count;
-      avg_io_cost= (io_count * avg_io_cost +
-                    cost->io_count * cost->avg_io_cost)
-	            /io_count_sum;
-      io_count= io_count_sum;
-    }
-    if (cost->idx_io_count != 0.0)
-    {
-      double idx_io_count_sum= idx_io_count + cost->idx_io_count;
-      idx_avg_io_cost= (idx_io_count * idx_avg_io_cost +
-                        cost->idx_io_count * cost->idx_avg_io_cost)
-	               /idx_io_count_sum;
-      idx_io_count= idx_io_count_sum;
-    }
-    cpu_cost += cost->cpu_cost;
-    idx_cpu_cost += cost->idx_cpu_cost;
-    import_cost += cost->import_cost;
-    comp_cost+= cost->comp_cost;
-    limit_cost+= cost->limit_cost;
-  }
-
-  void add_io(double add_io_cnt, double add_avg_cost)
-  {
-    /* In edge cases add_io_cnt may be zero */
-    if (add_io_cnt > 0)
-    {
-      double io_count_sum= io_count + add_io_cnt;
-      avg_io_cost= (io_count * avg_io_cost + 
-                    add_io_cnt * add_avg_cost) / io_count_sum;
-      io_count= io_count_sum;
-    }
+    avg_io_cost= 0;
+    comp_cost= cpu_cost= 0.0;
+    copy_cost= limit_cost= 0.0;
+    index_cost= {0,0};
+    row_cost=   {0,0};
   }
+  inline void reset(handler *file);
 
   /*
     To be used when we go from old single value-based cost calculations to
@@ -2914,13 +2903,10 @@ public:
   void convert_from_cost(double cost)
   {
     reset();
-    io_count= cost;
+    cpu_cost= cost;
   }
 };
 
-void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
-                         Cost_estimate *cost);
-
 /*
   Indicates that all scanned ranges will be singlepoint (aka equality) ranges.
   The ranges may not use the full key but all of them will use the same number
@@ -3094,20 +3080,6 @@ enum class Compare_keys : uint32_t
   NotEqual
 };
 
-/* Cost for reading a row through an index */
-struct INDEX_READ_COST
-{
-  double read_cost;
-  double index_only_cost;
-};
-
-/* Separated costs for IO and CPU. For handler::keyread_time() */
-struct IO_AND_CPU_COST
-{
-  double io;
-  double cpu;
-};
-
 
 /**
   The handler class is the interface for dynamically loadable
@@ -3183,6 +3155,7 @@ public:
   HANDLER_BUFFER *multi_range_buffer; /* MRR buffer info */
   uint ranges_in_seq; /* Total number of ranges in the traversed sequence */
   /** Current range (the one we're now returning rows from) */
+
   KEY_MULTI_RANGE mrr_cur_range;
 
   /** The following are for read_range() */
@@ -3610,6 +3583,58 @@ public:
   }
   virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share);
 
+  inline double io_cost(IO_AND_CPU_COST cost)
+  {
+    return cost.io * DISK_READ_COST * DISK_READ_RATIO;
+  }
+
+  inline double cost(IO_AND_CPU_COST cost)
+  {
+    return io_cost(cost) + cost.cpu;
+  }
+
+  /*
+    Calculate cost with capping io_blocks to the given maximum.
+    This is done here instead of earlier to allow filtering to work
+    with the original' io_block counts.
+  */
+  inline double cost(ALL_READ_COST *cost)
+  {
+    double blocks= (MY_MIN(cost->index_cost.io,(double) cost->max_index_blocks) +
+                    MY_MIN(cost->row_cost.io,  (double) cost->max_row_blocks));
+    return ((cost->index_cost.cpu + cost->row_cost.cpu + cost->copy_cost) +
+            blocks * DISK_READ_COST * DISK_READ_RATIO);
+  }
+
+  /*
+    Calculate cost when we are going to excute the given read method
+    multiple times
+  */
+  inline double cost_for_reading_multiple_times(double multiple,
+                                                ALL_READ_COST *cost)
+
+  {
+    double blocks= (MY_MIN(cost->index_cost.io * multiple,
+                              (double) cost->max_index_blocks) +
+                    MY_MIN(cost->row_cost.io * multiple,
+                           (double) cost->max_row_blocks));
+    return ((cost->index_cost.cpu + cost->row_cost.cpu + cost->copy_cost) *
+            multiple +
+            blocks * DISK_READ_COST * DISK_READ_RATIO);
+  }
+
+  inline ulonglong row_blocks()
+  {
+    return (stats.data_file_length + IO_SIZE-1) / IO_SIZE;
+  }
+
+  virtual ulonglong index_blocks(uint index, uint ranges, ha_rows rows);
+
+  inline ulonglong index_blocks(uint index)
+  {
+    return index_blocks(index, 1, stats.records);
+  }
+
   /*
     Time for a full table data scan. To be overrided by engines, should not
     be used by the sql level.
@@ -3619,7 +3644,7 @@ protected:
   {
     IO_AND_CPU_COST cost;
     ulonglong length= stats.data_file_length;
-    cost.io= (double) (length / IO_SIZE) * avg_io_cost();
+    cost.io= (double) (length / IO_SIZE);
     cost.cpu= (!stats.block_size ? 0.0 :
                (double) ((length + stats.block_size-1)/stats.block_size) *
                INDEX_BLOCK_COPY_COST);
@@ -3639,27 +3664,23 @@ public:
      a few rows and the extra cost has no practical effect.
   */
 
-  inline double ha_scan_time(ha_rows rows)
+  inline IO_AND_CPU_COST ha_scan_time(ha_rows rows)
   {
     IO_AND_CPU_COST cost= scan_time();
-    return (cost.io * DISK_READ_RATIO +
-            cost.cpu + TABLE_SCAN_SETUP_COST +
-            (double) rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST));
+    cost.cpu+= (TABLE_SCAN_SETUP_COST +
+                (double) rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST));
+    return cost;
   }
 
   /*
     Time for a full table scan, fetching the rows from the table and comparing
     the row with the where clause
   */
-  inline double ha_scan_and_compare_time(ha_rows rows)
-  {
-    return ha_scan_time(rows) + (double) rows * WHERE_COST;
-  }
-
-  /* Cost of (random) reading a block of IO_SIZE */
-  virtual double avg_io_cost()
+  inline IO_AND_CPU_COST ha_scan_and_compare_time(ha_rows rows)
   {
-    return DISK_READ_COST;
+    IO_AND_CPU_COST cost= ha_scan_time(rows);
+    cost.cpu+= (double) rows * WHERE_COST;
+    return cost;
   }
 
   /*
@@ -3684,7 +3705,7 @@ protected:
     double r= rows2double(rows);
     return
     {
-      r * avg_io_cost() * stats.block_size/IO_SIZE,    // Blocks read
+      r * ((stats.block_size + IO_SIZE -1 )/IO_SIZE),  // Blocks read
       r * INDEX_BLOCK_COPY_COST                        // Copy block from cache
      };
   }
@@ -3699,11 +3720,12 @@ public:
     row).
   */
 
-  inline double ha_rnd_pos_time(ha_rows rows)
+  inline IO_AND_CPU_COST ha_rnd_pos_time(ha_rows rows)
   {
     IO_AND_CPU_COST cost= rnd_pos_time(rows);
-    return (cost.io * DISK_READ_RATIO +
-            cost.cpu + rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST));
+    set_if_smaller(cost.io, (double) row_blocks());
+    cost.cpu+= rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST);
+    return cost;
   }
 
   /*
@@ -3712,20 +3734,24 @@ public:
     but that may change in the future after we do more cost checks for
     more engines.
   */
-  inline double ha_rnd_pos_call_time(ha_rows rows)
+  inline IO_AND_CPU_COST ha_rnd_pos_call_time(ha_rows rows)
   {
     IO_AND_CPU_COST cost= rnd_pos_time(rows);
-    return (cost.io * DISK_READ_RATIO +
-            cost.cpu + rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST));
+    set_if_smaller(cost.io, (double) row_blocks());
+    cost.cpu+= rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST);
+    return cost;
   }
 
-  inline double ha_rnd_pos_call_and_compare_time(ha_rows rows)
+  inline IO_AND_CPU_COST ha_rnd_pos_call_and_compare_time(ha_rows rows)
   {
-    return (ha_rnd_pos_call_time(rows) + rows2double(rows) * WHERE_COST);
+    IO_AND_CPU_COST cost;
+    cost= ha_rnd_pos_call_time(rows);
+    cost.cpu+= rows2double(rows) * WHERE_COST;
+    return cost;
   }
 
   /**
-    Calculate cost of 'index_only' scan for given index, a number of reanges
+    Calculate cost of 'index_only' scan for given index, a number of ranges
     and number of records.
 
     @param index   Index to read
@@ -3742,25 +3768,30 @@ public:
     Calculate cost of 'keyread' scan for given index and number of records
     including fetching the key to the 'record' buffer.
   */
-  double ha_keyread_time(uint index, ulong ranges, ha_rows rows,
-                         ulonglong blocks);
+  IO_AND_CPU_COST ha_keyread_time(uint index, ulong ranges, ha_rows rows,
+                                  ulonglong blocks);
 
   /* Same as above, but take into account copying the key the the SQL layer */
-  inline double ha_keyread_and_copy_time(uint index, ulong ranges,
-                                         ha_rows rows, ulonglong blocks)
+  inline IO_AND_CPU_COST ha_keyread_and_copy_time(uint index, ulong ranges,
+                                                  ha_rows rows,
+                                                  ulonglong blocks)
   {
-    return (ha_keyread_time(index, ranges, rows, blocks) +
-            (double) rows * KEY_COPY_COST);
+    IO_AND_CPU_COST cost= ha_keyread_time(index, ranges, rows, blocks);
+    cost.cpu+= (double) rows * KEY_COPY_COST;
+    return cost;
   }
 
-  inline double ha_keyread_and_compare_time(uint index, ulong ranges,
-                                            ha_rows rows, ulonglong blocks)
+  inline IO_AND_CPU_COST ha_keyread_and_compare_time(uint index, ulong ranges,
+                                                     ha_rows rows,
+                                                     ulonglong blocks)
   {
-    return (ha_keyread_time(index, ranges, rows, blocks) +
-            (double) rows * (KEY_COPY_COST + WHERE_COST));
+    IO_AND_CPU_COST cost= ha_keyread_time(index, ranges, rows, blocks);
+    cost.cpu+= (double) rows * (KEY_COPY_COST + WHERE_COST);
+    return cost;
   }
 
-  double ha_keyread_clustered_and_copy_time(uint index, ulong ranges,
+  IO_AND_CPU_COST ha_keyread_clustered_time(uint index,
+                                            ulong ranges,
                                             ha_rows rows,
                                             ulonglong blocks);
   /*
@@ -3776,21 +3807,23 @@ protected:
 public:
 
   /* Cost of doing a full index scan */
-  inline double ha_key_scan_time(uint index, ha_rows rows)
+  inline IO_AND_CPU_COST ha_key_scan_time(uint index, ha_rows rows)
   {
     IO_AND_CPU_COST cost= key_scan_time(index, rows);
-    return (cost.io * DISK_READ_RATIO +
-            cost.cpu + INDEX_SCAN_SETUP_COST + KEY_LOOKUP_COST +
-            (double) rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST));
+    cost.cpu+= (INDEX_SCAN_SETUP_COST + KEY_LOOKUP_COST +
+                (double) rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST));
+    return cost;
   }
 
   /*
     Cost of doing a full index scan with record copy and compare
     @param rows  Rows from stat tables
   */
-  inline double ha_key_scan_and_compare_time(uint index, ha_rows rows)
+  inline IO_AND_CPU_COST ha_key_scan_and_compare_time(uint index, ha_rows rows)
   {
-    return ha_key_scan_time(index, rows) + (double) rows * WHERE_COST;
+    IO_AND_CPU_COST cost= ha_key_scan_time(index, rows);
+    cost.cpu+= (double) rows * WHERE_COST;
+    return cost;
   }
 
   virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
@@ -5602,4 +5635,10 @@ uint ha_count_rw_2pc(THD *thd, bool all);
 uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
                                          bool all);
 
+inline void Cost_estimate::reset(handler *file)
+{
+  reset();
+  avg_io_cost= file->DISK_READ_COST * file->DISK_READ_RATIO;
+}
+
 #endif /* HANDLER_INCLUDED */
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index e4fd1e75176..a71667ab9fe 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -22,6 +22,9 @@
 #include "rowid_filter.h"
 #include "optimizer_defaults.h"
 
+static void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
+                                Cost_estimate *cost);
+
 
 
 /* The following calculation is the same as in multi_range_read_info() */
@@ -32,34 +35,35 @@ void handler::calculate_costs(Cost_estimate *cost, uint keyno,
                               ulonglong io_blocks,
                               ulonglong unassigned_single_point_ranges)
 {
-  double key_cost;
-  cost->reset();
-  cost->avg_io_cost= cost->idx_avg_io_cost= 0; // Not used!
+  cost->reset(this);
 
   if (!is_clustering_key(keyno))
   {
-    key_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
-    cost->idx_cpu_cost= key_cost;
+    cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
 
     if (!(flags & HA_MRR_INDEX_ONLY))
     {
       /* ha_rnd_pos_time includes ROW_COPY_COST */
-      cost->cpu_cost=     ha_rnd_pos_time(total_rows);
+      cost->row_cost= ha_rnd_pos_time(total_rows);
+      /* Adjust io cost to data size */
+      cost->row_cost.io= MY_MIN(cost->row_cost.io, row_blocks());
     }
     else
     {
       /* Index only read */
-      cost->copy_cost=    rows2double(total_rows) * KEY_COPY_COST;
+      cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST;
     }
   }
   else
   {
     /* Clustered index */
-    io_blocks+= unassigned_single_point_ranges;
-    key_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
-    cost->idx_cpu_cost= key_cost;
-    cost->copy_cost=    rows2double(total_rows) * ROW_COPY_COST;
+    io_blocks= unassigned_single_point_ranges;
+    cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
+    cost->copy_cost=  rows2double(total_rows) * ROW_COPY_COST;
   }
+  /* Adjust io cost to data size */
+  cost->index_cost.io= MY_MIN(cost->index_cost.io, index_blocks(keyno));
+
   cost->comp_cost= (rows2double(total_rows) * WHERE_COST +
                     MULTI_RANGE_READ_SETUP_COST);
 }
@@ -357,7 +361,7 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
     {
       /*
         Calculate what the cost would be if we only have to read 'top_limit'
-        rows. This is the lowest possible cost fwhen using the range
+        rows. This is the lowest possible cost when using the range
         when we find the 'accepted rows' at once.
       */
       Cost_estimate limit_cost;
@@ -365,16 +369,14 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
                       io_blocks, unassigned_single_point_ranges);
       cost->limit_cost= limit_cost.total_cost();
     }
+    DBUG_PRINT("statistics",
+               ("key: %s  rows: %llu  total_cost: %.3f  io_blocks: %llu  "
+                "cpu_cost: %.3f",
+                table->s->keynames.type_names[keyno],
+                (ulonglong) total_rows, cost->total_cost(),
+                (ulonglong) (cost->row_cost.io + cost->index_cost.io),
+                (double) (cost->row_cost.cpu + cost->index_cost.cpu)));
   }
-  DBUG_PRINT("statistics",
-             ("key: %s  rows: %llu  total_cost: %.3f  io_blocks: %llu  "
-              "idx_io_count: %.3f  cpu_cost: %.3f  io_count: %.3f  "
-              "compare_cost: %.3f",
-              table->s->keynames.type_names[keyno],
-              (ulonglong) total_rows, cost->total_cost(),
-              (ulonglong) io_blocks,
-              cost->idx_io_count, cost->cpu_cost, cost->io_count,
-              cost->comp_cost));
   DBUG_RETURN(total_rows);
 }
 
@@ -413,7 +415,8 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
     other Error or can't perform the requested scan
 */
 
-ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint total_rows,
+ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges,
+                                       uint total_rows,
                                        uint key_parts, uint *bufsz,
                                        uint *flags, Cost_estimate *cost)
 {
@@ -426,17 +429,17 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint total_row
   *bufsz= 0; /* Default implementation doesn't need a buffer */
   *flags |= HA_MRR_USE_DEFAULT_IMPL;
 
-  cost->reset();
+  cost->reset(this);
+
   /* Produce the same cost as non-MRR code does */
   if (!is_clustering_key(keyno))
   {
-    double key_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0);
-    cost->idx_cpu_cost= key_cost;
+    cost->index_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0);
 
     if (!(*flags & HA_MRR_INDEX_ONLY))
     {
       /* ha_rnd_pos_time includes ROW_COPY_COST */
-      cost->cpu_cost=     ha_rnd_pos_time(total_rows);
+      cost->row_cost= ha_rnd_pos_time(total_rows);
     }
     else
     {
@@ -447,7 +450,8 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint total_row
   else
   {
     /* Clustering key */
-    cost->cpu_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0);
+    cost->index_cost= ha_keyread_clustered_time(keyno, n_ranges, total_rows,
+                                                0);
     cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
   }
   cost->comp_cost= rows2double(total_rows) * WHERE_COST;
@@ -1966,7 +1970,8 @@ int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size)
 }
 
 
-static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost);
+static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows,
+                                    Cost_estimate *cost);
 
 
 /**
@@ -1997,7 +2002,6 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
   ha_rows rows_in_full_step;
   ha_rows rows_in_last_step;
   uint n_full_steps;
-  double index_read_cost;
 
   elem_size= primary_file->ref_length + 
              sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
@@ -2030,6 +2034,8 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
   rows_in_full_step= max_buff_entries;
   rows_in_last_step= rows % max_buff_entries;
   
+  cost->reset(primary_file);
+
   /* Adjust buffer size if we expect to use only part of the buffer */
   if (n_full_steps)
   {
@@ -2038,7 +2044,6 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
   }
   else
   {
-    cost->reset();
     *buffer_size= ((uint) MY_MAX(*buffer_size,
                                  (size_t)(1.2*rows_in_last_step) * elem_size +
                                  primary_file->ref_length +
@@ -2046,17 +2051,12 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
   }
   
   Cost_estimate last_step_cost;
+  last_step_cost.avg_io_cost= cost->avg_io_cost;
   get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
   cost->add(&last_step_cost);
  
-  if (n_full_steps != 0)
-    cost->mem_cost= *buffer_size;
-  else
-    cost->mem_cost= (double)rows_in_last_step * elem_size;
-  
   /* Total cost of all index accesses */
-  index_read_cost= primary_file->ha_keyread_and_copy_time(keynr, 1, rows, 0);
-  cost->add_io(index_read_cost, 1 /* Random seeks */);
+  cost->index_cost= primary_file->ha_keyread_and_copy_time(keynr, 1, rows, 0);
   return FALSE;
 }
 
@@ -2085,8 +2085,6 @@ void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
       cmp_op= 3;
     cost->cpu_cost += cmp_op * log2(cmp_op);
   }
-  else
-    cost->reset();
 }
 
 
@@ -2100,14 +2098,13 @@ void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
   @param cost         OUT  The cost.
 */
 
-void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
-                         Cost_estimate *cost)
+static void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
+                                Cost_estimate *cost)
 {
   DBUG_ENTER("get_sweep_read_cost");
 
-  cost->reset();
 #ifndef OLD_SWEEP_COST
-  cost->cpu_cost= table->file->ha_rnd_pos_call_time(nrows);
+  cost->row_cost= table->file->ha_rnd_pos_call_time(nrows);
 #else
   if (table->file->pk_is_clustering_key(table->s->primary_key))
   {
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index b2e109a5a72..215fe603623 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2713,6 +2713,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
   TABLE_READ_PLAN *best_trp= NULL;
   SEL_ARG **backup_keys= 0;
   ha_rows table_records= head->stat_records();
+  handler *file= head->file;
   /* We trust that if stat_records() is 0 the table is really empty! */
   bool impossible_range= table_records == 0;
   DBUG_ENTER("SQL_SELECT::test_quick_select");
@@ -2732,14 +2733,14 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     DBUG_RETURN(0);
   records= table_records;
   notnull_cond= head->notnull_cond;
-  if (head->file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
+  if (file->ha_table_flags() & HA_NON_COMPARABLE_ROWID)
     only_single_index_range_scan= 1;
 
   if (head->force_index || force_quick_range)
     read_time= DBL_MAX;
   else
   {
-    read_time= head->file->ha_scan_and_compare_time(records);
+    read_time= file->cost(file->ha_scan_and_compare_time(records));
     if (limit < records)
       notnull_cond= NULL;
   }
@@ -2775,7 +2776,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
 
     /* set up parameter that is passed to all functions */
     param.thd= thd;
-    param.baseflag= head->file->ha_table_flags();
+    param.baseflag= file->ha_table_flags();
     param.prev_tables=prev_tables | const_tables;
     param.read_tables=read_tables;
     param.current_table= head->map;
@@ -2884,8 +2885,9 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
     {
       double key_read_time;
       uint key_for_use= find_shortest_key(head, &head->covering_keys);
-      key_read_time= head->file->ha_key_scan_and_compare_time(key_for_use,
-                                                              records);
+      key_read_time= file->cost(file->
+                                ha_key_scan_and_compare_time(key_for_use,
+                                                             records));
       DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
                            "read time %g", key_for_use, key_read_time));
 
@@ -5095,9 +5097,15 @@ static double get_sweep_read_cost(const PARAM *param, ha_rows records,
 {
   DBUG_ENTER("get_sweep_read_cost");
 #ifndef OLD_SWEEP_COST
-  double cost= (param->table->file->ha_rnd_pos_call_time(records) +
-                (add_time_for_compare ?
-                 records * param->thd->variables.optimizer_where_cost : 0));
+  handler *file= param->table->file;
+  IO_AND_CPU_COST engine_cost= file->ha_rnd_pos_call_time(records);
+  double cost;
+  if (add_time_for_compare)
+  {
+    engine_cost.cpu+= records * param->thd->variables.optimizer_where_cost;
+  }
+  cost= file->cost(engine_cost);
+
   DBUG_PRINT("return", ("cost: %g", cost));
   DBUG_RETURN(cost);
 #else
@@ -5481,9 +5489,9 @@ skip_to_ror_scan:
     double cost;
     if ((*cur_child)->is_ror)
     {
-      /* Ok, we have index_only cost, now get full rows lokoup cost */
-      cost= param->table->file->
-        ha_rnd_pos_call_and_compare_time((*cur_child)->records);
+      handler *file= param->table->file;
+      /* Ok, we have index_only cost, now get full rows scan cost */
+      cost= file->cost(file->ha_rnd_pos_call_and_compare_time((*cur_child)->records));
     }
     else
       cost= read_time;
@@ -6681,6 +6689,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
   ROR_SCAN_INFO *ror_scan;
   my_bitmap_map *bitmap_buf;
   uint keynr;
+  handler *file= param->table->file;
   DBUG_ENTER("make_ror_scan");
 
   if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
@@ -6690,7 +6699,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
   ror_scan->idx= idx;
   ror_scan->keynr= keynr= param->real_keynr[idx];
   ror_scan->key_rec_length= (param->table->key_info[keynr].key_length +
-                             param->table->file->ref_length);
+                             file->ref_length);
   ror_scan->sel_arg= sel_arg;
   ror_scan->records= param->quick_rows[keynr];
 
@@ -6717,8 +6726,8 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
     ror queue.
   */
   ror_scan->index_read_cost=
-    param->table->file->ha_keyread_and_copy_time(ror_scan->keynr, 1,
-                                                 ror_scan->records, 0);
+    file->cost(file->ha_keyread_and_copy_time(ror_scan->keynr, 1,
+                                              ror_scan->records, 0));
   DBUG_RETURN(ror_scan);
 }
 
@@ -7664,8 +7673,8 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
       Json_writer_object trace_idx(thd);
       trace_idx.add("index", param->table->key_info[keynr].name);
 
-      found_records= check_quick_select(param, idx, limit, read_index_only, key,
-                                        for_range_access, &mrr_flags,
+      found_records= check_quick_select(param, idx, limit, read_index_only,
+                                        key, for_range_access, &mrr_flags,
                                         &buf_size, &cost, &is_ror_scan);
 
       if (found_records == HA_POS_ERROR ||
@@ -11868,22 +11877,10 @@ ha_rows check_quick_select(PARAM *param, uint idx, ha_rows limit,
                             rows) :
                            1.0);                // ok as rows is 0
       range->rows= rows;
-      /* cost of finding a row without copy or checking the where */
-      range->find_cost= cost->find_cost();
-      /* cost of finding a row copying it to the row buffer */
-      range->fetch_cost= range->find_cost + cost->data_copy_cost();
-      /* Add comparing it to the where. Same as cost.total_cost() */
-      range->cost= (range->fetch_cost + cost->compare_cost());
-      /* Calculate the cost of just finding the key. Used by filtering */
-      if (param->table->file->is_clustering_key(keynr))
-	range->index_only_cost= range->find_cost;
-      else
-      {
-        range->index_only_cost= cost->index_only_cost();
-        DBUG_ASSERT(!(*mrr_flags & HA_MRR_INDEX_ONLY) ||
-                    range->index_only_cost ==
-                    range->find_cost);
-      }
+      range->cost= *cost;
+      range->max_index_blocks= file->index_blocks(keynr, range->ranges,
+                                                  rows);
+      range->max_row_blocks= MY_MIN(file->row_blocks(), rows * file->stats.block_size / IO_SIZE);
       range->first_key_part_has_only_one_value=
         check_if_first_key_part_has_only_one_value(tree);
     }
@@ -15120,8 +15117,8 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
                                                              1);
     if (keys_per_group == 0) /* If there is no statistics try to guess */
     {
-      /* each group contains 1% of all records */
-      keys_per_group= (records / 100) + 1;
+      /* each group contains 10% of all records */
+      keys_per_group= (records / 10) + 1;
     }
   }
   if (keys_per_group > 1)
@@ -15168,12 +15165,11 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
   }
   DBUG_ASSERT(num_groups <= records);
 
-
   /* Calculate the number of blocks we will touch for the table or range scan */
   num_blocks= (records * key_length / INDEX_BLOCK_FILL_FACTOR_DIV *
                INDEX_BLOCK_FILL_FACTOR_MUL) / file->stats.block_size + 1;
 
-  io_cost= (have_max) ? num_groups*2 : num_groups;
+  io_cost= (have_max) ? num_groups * 2 : num_groups;
   set_if_smaller(io_cost, num_blocks);
 
   /*
@@ -15184,9 +15180,10 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
     group.
   */
   uint keyno= (uint) (index_info - table->key_info);
-  *read_cost= file->ha_keyread_and_compare_time(keyno, (ulong) num_groups,
-                                                num_groups,
-                                                io_cost);
+  *read_cost= file->cost(file->ha_keyread_and_compare_time(keyno,
+                                                           (ulong) num_groups,
+                                                           num_groups,
+                                                           io_cost));
   *out_records= num_groups;
 
   DBUG_PRINT("info",
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 93459c49c23..f58410ccb15 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -1472,7 +1472,8 @@ void get_delayed_table_estimates(TABLE *table,
                               hash_sj_engine->tmp_table->s->reclength);
 
   /* Do like in handler::ha_scan_and_compare_time, but ignore the where cost */
-  *scan_time= ((data_size/IO_SIZE *  table->file->avg_io_cost()) +
+  *scan_time= ((data_size/IO_SIZE *  table->file->DISK_READ_COST *
+                table->file->DISK_READ_RATIO) +
                *out_rows * file->ROW_COPY_COST);
 }
 
@@ -2521,7 +2522,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
                                          &subjoin_read_time,
                                          &subjoin_out_rows);
 
-        sjm->materialization_cost.convert_from_cost(subjoin_read_time);
+        sjm->materialization_cost=subjoin_read_time;
         sjm->rows_with_duplicates= sjm->rows= subjoin_out_rows;
         
         /*
@@ -2586,8 +2587,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
           temporary table. Note that smj->materialization_cost already includes
           row copy and compare costs of finding the original row.
         */ 
-        sjm->materialization_cost.add_io(subjoin_out_rows, cost.write);
-        sjm->materialization_cost.copy_cost+= cost.create;
+        sjm->materialization_cost+=subjoin_out_rows * cost.write + cost.create;
 
         /*
           Set the cost to do a full scan of the temptable (will need this to 
@@ -2600,10 +2600,10 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
         total_cost= (scan_cost * cost.cache_hit_ratio * cost.avg_io_cost +
                      TABLE_SCAN_SETUP_COST_THD(thd) +
                      row_copy_cost * sjm->rows);
-        sjm->scan_cost.convert_from_cost(total_cost);
+        sjm->scan_cost=total_cost;
 
         /* When reading a row, we have also to check the where clause */
-        sjm->lookup_cost.convert_from_cost(cost.lookup + WHERE_COST_THD(thd));
+        sjm->lookup_cost= cost.lookup + WHERE_COST_THD(thd);
         sj_nest->sj_mat_info= sjm;
         DBUG_EXECUTE("opt", print_sjm(sjm););
       }
@@ -3183,9 +3183,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
 
       mat_read_time=
         COST_ADD(prefix_cost,
-                 COST_ADD(mat_info->materialization_cost.total_cost(),
+                 COST_ADD(mat_info->materialization_cost,
                           COST_MULT(prefix_rec_count,
-                                    mat_info->lookup_cost.total_cost())));
+                                    mat_info->lookup_cost)));
 
       /*
         NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION
@@ -3235,9 +3235,9 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
     /* Add materialization cost */
     prefix_cost=
       COST_ADD(prefix_cost,
-               COST_ADD(mat_info->materialization_cost.total_cost(),
+               COST_ADD(mat_info->materialization_cost,
                         COST_MULT(prefix_rec_count,
-                                  mat_info->scan_cost.total_cost())));
+                                  mat_info->scan_cost)));
     prefix_rec_count= COST_MULT(prefix_rec_count, mat_info->rows);
     
     uint i;
diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h
index b0053d3db14..2eb5ff56b02 100644
--- a/sql/opt_subselect.h
+++ b/sql/opt_subselect.h
@@ -228,15 +228,16 @@ public:
       {
         double records, read_time;
         part1_conds_met= TRUE;
+        handler *file= s->table->file;
         DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
         
         /* Calculate the cost of complete loose index scan.  */
-        records= rows2double(s->table->file->stats.records);
+        records= rows2double(file->stats.records);
 
         /* The cost is entire index scan cost (divided by 2) */
-        read_time= s->table->file->ha_keyread_and_copy_time(key, 1,
-                                                            (ha_rows) records,
-                                                            0);
+        read_time= file->cost(file->ha_keyread_and_copy_time(key, 1,
+                                                             (ha_rows) records,
+                                                             0));
 
         /*
           Now find out how many different keys we will get (for now we
diff --git a/sql/optimizer_defaults.h b/sql/optimizer_defaults.h
index 8d74bb91cc3..26c54464219 100644
--- a/sql/optimizer_defaults.h
+++ b/sql/optimizer_defaults.h
@@ -175,7 +175,7 @@
 #define TMPFILE_CREATE_COST         0.5  // Cost of creating and deleting files
 #define HEAP_TEMPTABLE_CREATE_COST  0.025 // ms
 /* Cost taken from HEAP_LOOKUP_COST in ha_heap.cc */
-#define HEAP_TEMPTABLE_LOOKUP_COST (0.00016097*1000 + heap_optimizer_costs.row_copy_cost)
+#define HEAP_TEMPTABLE_LOOKUP_COST (0.00016097)
 #define DISK_TEMPTABLE_LOOKUP_COST(thd) (tmp_table_optimizer_costs.key_lookup_cost + tmp_table_optimizer_costs.row_lookup_cost + tmp_table_optimizer_costs.row_copy_cost)
 #define DISK_TEMPTABLE_CREATE_COST TMPFILE_CREATE_COST*2 // 2 tmp tables
 #define DISK_TEMPTABLE_BLOCK_SIZE  IO_SIZE
diff --git a/sql/rowid_filter.cc b/sql/rowid_filter.cc
index 4f713edb47f..4d6bf4ea98c 100644
--- a/sql/rowid_filter.cc
+++ b/sql/rowid_filter.cc
@@ -485,12 +485,9 @@ void Range_rowid_filter_cost_info::trace_info(THD *thd)
 */
 
 Range_rowid_filter_cost_info *
-TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no,
-                                                double records,
-                                                double fetch_cost,
-                                                double index_only_cost,
-                                                double prev_records,
-                                                double *records_out)
+TABLE::best_range_rowid_filter(uint access_key_no, double records,
+                               double fetch_cost, double index_only_cost,
+                               double prev_records, double *records_out)
 {
   if (range_rowid_filter_cost_info_elems == 0 ||
       covering_keys.is_set(access_key_no))
diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h
index 91ffd8c065a..f761e1220aa 100644
--- a/sql/rowid_filter.h
+++ b/sql/rowid_filter.h
@@ -472,18 +472,20 @@ public:
   friend
   void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd);
 
+  /* Best range row id filter for parital join */
   friend
   Range_rowid_filter_cost_info *
-  TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no,
-                                                  double records,
-                                                  double fetch_cost,
-                                                  double index_only_cost,
-                                                  double prev_records,
-                                                  double *records_out);
+  TABLE::best_range_rowid_filter(uint access_key_no,
+                                 double records,
+                                 double fetch_cost,
+                                 double index_only_cost,
+                                 double prev_records,
+                                 double *records_out);
   Range_rowid_filter_cost_info *
-    apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg,
-                 double *startup_cost, double fetch_cost,
-                 double index_only_cost, uint ranges, double record_count);
+    apply_filter(THD *thd, TABLE *table, ALL_READ_COST *cost,
+                 double *records_arg,
+                 double *startup_cost,
+                 uint ranges, double record_count);
 };
 
 #endif /* ROWID_FILTER_INCLUDED */
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 6cdb553629c..69907208dec 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -6814,13 +6814,13 @@ public:
   /* 
     Cost to materialize - execute the sub-join and write rows into temp.table
   */
-  Cost_estimate materialization_cost;
+  double materialization_cost;
 
   /* Cost to make one lookup in the temptable */
-  Cost_estimate lookup_cost;
+  double lookup_cost;
   
   /* Cost of scanning the materialized table */
-  Cost_estimate scan_cost;
+  double scan_cost;
 
   /* --- Execution structures ---------- */
   
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index e92f22ebc4a..61f609aa592 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -5445,6 +5445,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
     {
       set_position(join,const_count++,s,(KEYUSE*) 0);
       no_rows_const_tables |= table->map;
+      table->file->stats.records= 0;
     }
   }
 
@@ -7861,42 +7862,49 @@ static double matching_candidates_in_table(JOIN_TAB *s,
   WHERE_COST cost is not added to any result.
 */
 
-INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table,
-                                    uint key,
-                                    ha_rows records, ha_rows worst_seeks)
+ALL_READ_COST cost_for_index_read(const THD *thd, const TABLE *table,
+                                  uint key, ha_rows records,
+                                  ha_rows worst_seeks)
 {
-  INDEX_READ_COST cost;
+  ALL_READ_COST cost;
   handler *file= table->file;
-  double rows_adjusted;
+  ha_rows max_seeks;
   DBUG_ENTER("cost_for_index_read");
 
-  rows_adjusted= MY_MIN(rows2double(records), (double) thd->variables.max_seeks_for_key);
-  set_if_bigger(rows_adjusted, 1);
+  max_seeks= (ha_rows) thd->variables.max_seeks_for_key;
+  set_if_bigger(records, 1);
 
-#ifdef OLD_CODE_LIMITED_SEEKS
-  set_if_smaller(rows_adjusted, worst_seeks);
-#endif
   if (file->is_clustering_key(key))
   {
-    cost.index_only_cost=
-      file->ha_keyread_clustered_and_copy_time(key, 1, rows_adjusted, 0);
+    cost.index_cost=
+      file->ha_keyread_clustered_time(key, 1, records, 0);
+    cost.copy_cost= rows2double(records) * file->ROW_COPY_COST;
     /* There is no 'index_only_read' with a clustered index */
-    cost.read_cost= cost.index_only_cost;
+    cost.row_cost= {0,0};
+    /* Caping of index_blocks will happen in handler::cost() */
+    cost.max_index_blocks= MY_MIN(file->row_blocks(), max_seeks);
+    cost.max_row_blocks= 0;
   }
   else if (table->covering_keys.is_set(key) && !table->no_keyread)
   {
-    cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0);
-    /* Same computation as in ha_keyread_and_copy_time() */
-    cost.read_cost= (cost.index_only_cost +
-                     rows2double(records) * file->KEY_COPY_COST);
+    cost.index_cost= file->ha_keyread_time(key, 1, records, 0);
+    cost.row_cost= {0,0};
+    cost.copy_cost= rows2double(records) * file->KEY_COPY_COST;
+    cost.max_index_blocks= MY_MIN(file->index_blocks(key), max_seeks);
+    cost.max_row_blocks= 0;
   }
   else
   {
-    cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0);
-    cost.read_cost= (cost.index_only_cost + file->ha_rnd_pos_time(records));
+    cost.index_cost= file->ha_keyread_time(key, 1, records, 0);
+    /* ha_rnd_pos_time() includes time for copying the row */
+    cost.row_cost= file->ha_rnd_pos_time(records);
+    cost.max_index_blocks= MY_MIN(file->index_blocks(key), max_seeks);
+    cost.max_row_blocks=   MY_MIN(file->row_blocks(), max_seeks);
+    cost.copy_cost= 0;
   }
-  DBUG_PRINT("statistics", ("index_cost: %.3f  full_cost: %.3f",
-                            cost.index_only_cost, cost.read_cost));
+  DBUG_PRINT("statistics", ("index_cost: %.3f  row_cost: %.3f",
+                            file->cost(cost.index_cost),
+                            file->cost(cost.row_cost)));
   DBUG_RETURN(cost);
 }
 
@@ -7906,14 +7914,15 @@ INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table,
 
    @param thd             Thread handler
    @param table           Table
-   @param cost            Pointer to cost for *records_arg rows, not including
-                          WHERE_COST cost.
+   @param cost            Pointer to cost for current cost, which does not
+                          include WHERE_COST cost. Will be updated to
+                          new cost if filter is chosen.
                           Will be updated to new cost if filter is used.
    @param records_arg     Pointer to number of records for the current key.
                           Will be updated to records after filter, if filter is
                           used.
    @param startup_cost    Startup cost. Will be updated if filter is used.
-   @param fetch_cost      Cost of finding the row, without copy or compare cost
+   @param fetch_cost      Cost of finding the row, without where compare cost
    @param index_only_cost Cost if fetching '*records_arg' key values
    @param prev_records    Number of record combinations in previous tables
 
@@ -7922,16 +7931,18 @@ INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table,
 */
 
 Range_rowid_filter_cost_info* Range_rowid_filter_cost_info::
-apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg,
-             double *startup_cost, double fetch_cost, double index_only_cost,
+apply_filter(THD *thd, TABLE *table, ALL_READ_COST *cost,
+             double *records_arg,
+             double *startup_cost,
              uint ranges, double prev_records)
 {
+  handler *file= table->file;
   bool use_filter;
-  double new_cost, new_total_cost, records= *records_arg, new_records;
-  double cost_of_accepted_rows, cost_of_rejected_rows;
+  double new_cost, org_cost, records= *records_arg, new_records;
   double filter_startup_cost= get_setup_cost();
-  double io_cost= table->file->avg_io_cost();
   double filter_lookup_cost= records * lookup_cost();
+  double tmp;
+  ALL_READ_COST adjusted_cost;
 
   /*
     Calculate number of resulting rows after filtering
@@ -7955,42 +7966,50 @@ apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg,
     The io_cost is used to take into account that we have to do 1 key
     lookup to find the first matching key in each range.
   */
-  cost_of_accepted_rows= fetch_cost * selectivity;
-  cost_of_rejected_rows= index_only_cost * (1-selectivity);
-  /*
-    The MAX() is used below to ensure that we take into account the index
-    read even if selectivity (and thus new_records) would be very low.
-  */
-  new_cost= (MY_MAX(cost_of_accepted_rows,
-                    ranges * table->file->KEY_LOOKUP_COST +
-                    ranges * io_cost * table->file->DISK_READ_RATIO) +
-             cost_of_rejected_rows + filter_lookup_cost);
-  new_total_cost= ((new_cost + new_records * WHERE_COST_THD(thd)) *
-                   prev_records + filter_startup_cost);
+
+  adjusted_cost= *cost;
+  /* We are going to read 'selectivity' fewer rows */
+  adjusted_cost.row_cost.io*= selectivity;
+  adjusted_cost.row_cost.cpu*= selectivity;
+  adjusted_cost.copy_cost*= selectivity;
+  adjusted_cost.index_cost.cpu+= filter_lookup_cost;
+
+  tmp= prev_records * WHERE_COST_THD(thd);
+  org_cost= (file->cost_for_reading_multiple_times(prev_records,
+                                                   cost) +
+             records * tmp);
+
+  new_cost= (file->cost_for_reading_multiple_times(prev_records,
+                                                   &adjusted_cost) +
+             new_records * tmp + filter_startup_cost);
 
   DBUG_ASSERT(new_cost >= 0 && new_records >= 0);
-  use_filter= ((*cost + records * WHERE_COST_THD(thd)) * prev_records >
-               new_total_cost);
+  use_filter= new_cost < org_cost;
 
   if (unlikely(thd->trace_started()))
   {
     Json_writer_object trace_filter(thd, "filter");
     trace_filter.add("rowid_filter_key",
                      table->key_info[get_key_no()].name).
-      add("index_only_cost", index_only_cost).
+      add("index_only_cost", file->cost(cost->index_cost)).
       add("filter_startup_cost", filter_startup_cost).
       add("find_key_and_filter_lookup_cost", filter_lookup_cost).
       add("filter_selectivity", selectivity).
-      add("orginal_rows", records).
-      add("new_rows", new_records).
-      add("original_found_rows_cost", fetch_cost).
-      add("new_found_rows_cost", new_cost).
-      add("cost", new_total_cost).
+      add("original_rows", records).
+      add("new_rows",     new_records).
+      add("original_access_cost", file->cost(cost)).
+      add("with_filter_access_cost",  file->cost(&adjusted_cost)).
+      add("original_found_rows_cost", file->cost(cost->row_cost)).
+      add("with_filter_found_rows_cost", file->cost(adjusted_cost.row_cost)).
+      add("org_cost", org_cost).
+      add("filter_cost", new_cost).
       add("filter_used", use_filter);
   }
   if (use_filter)
   {
-    *cost= new_cost;
+    cost->row_cost=   adjusted_cost.row_cost;
+    cost->index_cost= adjusted_cost.index_cost;
+    cost->copy_cost=  adjusted_cost.copy_cost;
     *records_arg= new_records;
     (*startup_cost)+= filter_startup_cost;
     return this;
@@ -8060,6 +8079,7 @@ best_access_path(JOIN      *join,
   uint use_cond_selectivity=
     thd->variables.optimizer_use_condition_selectivity;
   TABLE *table= s->table;
+  handler *file= table->file;
   my_bool found_constraint= 0;
   /*
     key_dependent is 0 if all key parts could be used or if there was an
@@ -8068,7 +8088,7 @@ best_access_path(JOIN      *join,
     Otherwise it's a bitmap of tables that could improve key usage.
   */
   table_map key_dependent= 0;
-  double tmp;
+  ALL_READ_COST tmp;
   ha_rows rec;
   MY_BITMAP *eq_join_set= &s->table->eq_join_set;
   KEYUSE *hj_start_key= 0;
@@ -8112,9 +8132,9 @@ best_access_path(JOIN      *join,
   if (s->keyuse)
   {                                            /* Use key if possible */
     KEYUSE *keyuse, *start_key= 0;
-    double index_only_cost= DBL_MAX;
     uint max_key_part=0;
     enum join_type type= JT_UNKNOWN;
+    double cur_cost;
 
     /* Test how we can use keys */
     rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE;  // Assumed records/key
@@ -8245,16 +8265,26 @@ best_access_path(JOIN      *join,
       if (ft_key)
       {
         /*
-          Calculate an adjusted cost based on how many records are read
-          This will be later multipled by record_count.
+          Fulltext indexes are preformed the following way:
+          - In the prepare step it performs the search, collects all positions
+            in an array, sorts it.
+          - If optimizer decides to use the ft index access method it simply'
+            returns positions from the array one by one
+          - If optimizer decides to use something else (another index, table
+            scan), then it'll use binary search in the array to find the
+            position.
+
+         The following code puts the cost down to very small as the prep
+         step will always be done and the cost to fetch the row from memory
+         is very small.
+         Alternatively we could use the cost of an EQ_REF here.
         */
-        tmp= (prev_record_reads(join_positions, idx, found_ref) /
-              record_count);
-        set_if_smaller(tmp, 1.0);
-        index_only_cost= tmp;
+        tmp.reset();
+        tmp.row_cost.cpu= file->ROW_COPY_COST;
         /*
-          Really, there should be records=0.0 (yes!)
-          but 1.0 would be probably safer
+          We don't know how many records will match. However, we want to have
+          the fulltext search done early, so we put the number of records
+          to be very low.
         */
         records= 1.0;
         type= JT_FT;
@@ -8293,25 +8323,21 @@ best_access_path(JOIN      *join,
             if (!found_ref && table->opt_range_keys.is_set(key))
             {
               /* Ensure that the cost is identical to the range cost */
-              tmp= table->opt_range[key].fetch_cost;
-              index_only_cost= table->opt_range[key].index_only_cost;
+              table->opt_range[key].get_costs(&tmp);
             }
             else
             {
-              INDEX_READ_COST cost= cost_for_index_read(thd, table, key,
-                                                        1,1);
-              tmp= cost.read_cost;
-              index_only_cost= cost.index_only_cost;
+              tmp= cost_for_index_read(thd, table, key, 1, 1);
             }
             /*
               Calculate an adjusted cost based on how many records are read
-              This will be later multipled by record_count.
+              This will be multipled by record_count.
             */
             adjusted_cost= (prev_record_reads(join_positions, idx, found_ref) /
                             record_count);
             set_if_smaller(adjusted_cost, 1.0);
-            tmp*= adjusted_cost;
-            index_only_cost*= adjusted_cost;
+            tmp.row_cost.cpu*=   adjusted_cost;
+            tmp.index_cost.cpu*= adjusted_cost;
             records= 1.0;
           }
           else
@@ -8345,8 +8371,8 @@ best_access_path(JOIN      *join,
                 /* Ensure that the cost is identical to the range cost */
                 records= (double) table->opt_range[key].rows;
                 trace_access_idx.add("used_range_estimates", true);
-                tmp= table->opt_range[key].fetch_cost;
-                index_only_cost= table->opt_range[key].index_only_cost;
+
+                table->opt_range[key].get_costs(&tmp);
                 goto got_cost2;
               }
               /* quick_range couldn't use key! */
@@ -8408,16 +8434,14 @@ best_access_path(JOIN      *join,
               }
             }
             /* Calculate the cost of the index access */
-            INDEX_READ_COST cost=
-              cost_for_index_read(thd, table, key,
-                                  (ha_rows) records,
-                                  (ha_rows) s->worst_seeks);
-            tmp= cost.read_cost;
-            index_only_cost= cost.index_only_cost;
+            tmp= cost_for_index_read(thd, table, key,
+                                     (ha_rows) records,
+                                     (ha_rows) s->worst_seeks);
           }
         }
         else
         {
+          ha_rows tmp_records;
           type = ref_or_null_part ? JT_REF_OR_NULL : JT_REF;
           if (unlikely(trace_access_idx.trace_started()))
             trace_access_idx.
@@ -8430,7 +8454,7 @@ best_access_path(JOIN      *join,
             records.
           */
           if ((found_part & 1) &&
-              (!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) ||
+              (!(file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) ||
                found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts)))
           {
             double extra_cost= 0;
@@ -8480,8 +8504,7 @@ best_access_path(JOIN      *join,
                 table->opt_range[key].ranges == 1 + MY_TEST(ref_or_null_part)) //(C3)
             {
               records= (double) table->opt_range[key].rows;
-              tmp= table->opt_range[key].fetch_cost;
-              index_only_cost= table->opt_range[key].index_only_cost;
+              table->opt_range[key].get_costs(&tmp);
               /*
                 TODO: Disable opt_range testing below for this range as we can
                 always use this ref instead.
@@ -8599,13 +8622,12 @@ best_access_path(JOIN      *join,
 
             /* Limit the number of matched rows */
             set_if_smaller(records, (double) s->records);
-            tmp= records;
-            set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
-            INDEX_READ_COST cost= cost_for_index_read(thd, table, key,
-                                                      (ha_rows) tmp,
-                                                      (ha_rows) s->worst_seeks);
-            tmp= cost.read_cost;
-            index_only_cost= cost.index_only_cost+extra_cost;
+            tmp_records= records;
+            set_if_smaller(tmp_records, thd->variables.max_seeks_for_key);
+            tmp= cost_for_index_read(thd, table, key,
+                                     tmp_records,
+                                     (ha_rows) s->worst_seeks);
+            tmp.copy_cost+= extra_cost;
           }
           else
           {
@@ -8620,7 +8642,7 @@ best_access_path(JOIN      *join,
 
     got_cost2:
         loose_scan_opt.check_ref_access_part2(key, start_key, records,
-                                              tmp + startup_cost,
+                                              file->cost(&tmp) + startup_cost,
                                               found_ref);
       } /* not ft_key */
 
@@ -8630,14 +8652,13 @@ best_access_path(JOIN      *join,
       records_best_filter= records_after_filter= records;
       
       /*
-        Check that start_key->key can be used for index access
+        Check if we can use a filter.
         Records can be 0 in case of empty tables.
       */
       if ((found_part & 1) && records &&
           (table->file->index_flags(start_key->key,0,1) &
            HA_DO_RANGE_FILTER_PUSHDOWN))
       {
-
         /*
           If we use filter F with selectivity s the the cost of fetching data
           by key using this filter will be
@@ -8683,38 +8704,39 @@ best_access_path(JOIN      *join,
           number of rows from prev_record_read() and keyread_tmp is 0. These
           numbers are not usable with rowid filter code.
         */
-        filter=
-          table->best_range_rowid_filter_for_partial_join(start_key->key,
-                                                          records,
-                                                          tmp,
-                                                          index_only_cost,
-                                                          record_count,
-                                                          &records_best_filter);
+        filter= table->best_range_rowid_filter(start_key->key,
+                                               records,
+                                               file->cost(&tmp),
+                                               file->cost(tmp.index_cost),
+                                               record_count,
+                                               &records_best_filter);
         set_if_smaller(best.records_out, records_best_filter);
         if (filter)
-          filter= filter->apply_filter(thd, table, &tmp, &records_after_filter,
+          filter= filter->apply_filter(thd, table, &tmp,
+                                       &records_after_filter,
                                        &startup_cost,
-                                       tmp, index_only_cost,
                                        1, record_count);
       }
-      tmp= COST_ADD(tmp, records_after_filter * WHERE_COST_THD(thd));
-      tmp= COST_MULT(tmp, record_count);
-      tmp= COST_ADD(tmp, startup_cost);
+
+      tmp.copy_cost+= records_after_filter * WHERE_COST_THD(thd);
+      cur_cost= file->cost_for_reading_multiple_times(record_count, &tmp);
+      cur_cost= COST_ADD(cur_cost, startup_cost);
+
       if (unlikely(trace_access_idx.trace_started()))
       {
         trace_access_idx.
           add("rows", records_after_filter).
-          add("cost", tmp);
+          add("cost", cur_cost);
       }
 
       /*
         The COST_EPS is here to ensure we use the first key if there are
         two 'identical keys' that could be used.
       */
-      if (tmp + COST_EPS < best.cost)
+      if (cur_cost + COST_EPS < best.cost)
       {
         trace_access_idx.add("chosen", true);
-        best.cost= tmp;
+        best.cost= cur_cost;
         /*
           We use 'records' instead of 'records_after_filter' here as we want
           to have EXPLAIN print the number of rows found by the key access.
@@ -8792,10 +8814,11 @@ best_access_path(JOIN      *join,
       (!(table->map & join->outer_join) ||
        join->allowed_outer_join_with_cache))    // (2)
   {
-    double refills, row_copy_cost, cmp_time;
+    double refills, row_copy_cost, cmp_time, cur_cost;
     /* Estimate the cost of the hash join access to the table */
     double rnd_records= matching_candidates_in_table(s, 0,
                                                      use_cond_selectivity);
+    DBUG_ASSERT(rnd_records <= s->found_records);
     set_if_smaller(best.records_out, rnd_records);
 
     /*
@@ -8808,16 +8831,16 @@ best_access_path(JOIN      *join,
         Cost of reading rows through opt_range including comparing the rows
         with the attached WHERE clause.
       */
-      tmp= s->quick->read_time;
+      cur_cost= s->quick->read_time;
     }
     else
-      tmp= s->cached_scan_and_compare_time;
+      cur_cost= s->cached_scan_and_compare_time;
 
     /* We read the table as many times as join buffer becomes full. */
     refills= (1.0 + floor((double) cache_record_length(join,idx) *
                           record_count /
                           (double) thd->variables.join_buff_size));
-    tmp= COST_MULT(tmp, refills);
+    cur_cost= COST_MULT(cur_cost, refills);
 
     /*
       Cost of doing the hash lookup and check all matching rows with the
@@ -8831,9 +8854,9 @@ best_access_path(JOIN      *join,
                rnd_records * record_count * HASH_FANOUT *
                ((idx - join->const_tables) * row_copy_cost +
                 WHERE_COST_THD(thd)));
-    tmp= COST_ADD(tmp, cmp_time);
+    cur_cost= COST_ADD(cur_cost, cmp_time);
 
-    best.cost= tmp;
+    best.cost= cur_cost;
     best.records_read= best.records_after_filter= rows2double(s->records);
     best.records= rnd_records;
     best.key= hj_start_key;
@@ -8895,13 +8918,13 @@ best_access_path(JOIN      *join,
         s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2)
         best.key && s->quick->index == best.key->key &&                  // (2)
         best.max_key_part >= table->opt_range[best.key->key].key_parts) &&// (2)
-      !((table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) &&      // (3)
+      !((file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) &&      // (3)
         !table->covering_keys.is_clear_all() && best.key && !s->quick) &&// (3)
       !(table->force_index_join && best.key && !s->quick) &&             // (4)
       !(best.key && table->pos_in_table_list->jtbm_subselect))           // (5)
   {                                             // Check full join
     double records_after_filter, org_records;
-    double records_best_filter;
+    double records_best_filter, cur_cost;
     Range_rowid_filter_cost_info *filter= 0;
     double startup_cost= s->startup_cost;
     const char *scan_type= "";
@@ -8929,7 +8952,7 @@ best_access_path(JOIN      *join,
         access (see first else-branch below), but we don't take it into 
         account here for range/index_merge access. Find out why this is so.
       */
-      tmp= COST_MULT(s->quick->read_time, record_count);
+      cur_cost= COST_MULT(s->quick->read_time, record_count);
 
       /*
         Use record count from range optimizer.
@@ -8938,6 +8961,7 @@ best_access_path(JOIN      *join,
       */
       org_records= records_after_filter= rows2double(s->found_records);
       records_best_filter= org_records;
+      set_if_smaller(best.records_out, records_best_filter);
 
       if (s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
       {
@@ -8950,37 +8974,32 @@ best_access_path(JOIN      *join,
           registers complications when costs are calculated.
         */
         DBUG_ASSERT(range->rows == s->found_records);
-        DBUG_ASSERT((range->cost == 0.0 && s->quick->read_time == 0.0) ||
-                     (range->cost / s->quick->read_time <= 1.0000001 &&
-                      range->cost / s->quick->read_time >= 0.9999999));
-
-        filter=
-        table->best_range_rowid_filter_for_partial_join(key_no,
-                                                        rows2double(range->rows),
-                                                        range->find_cost,
-                                                        range->index_only_cost,
-                                                        record_count,
-                                                        &records_best_filter);
+        DBUG_ASSERT((range->cost.total_cost() == 0.0 &&
+                     s->quick->read_time == 0.0) ||
+                    (range->cost.total_cost() / s->quick->read_time <= 1.0000001 &&
+                     range->cost.total_cost() / s->quick->read_time >= 0.9999999));
+
+        range->get_costs(&tmp);
+        filter= table->best_range_rowid_filter(key_no,
+                                               rows2double(range->rows),
+                                               file->cost(&tmp),
+                                               file->cost(tmp.index_cost),
+                                               record_count,
+                                               &records_best_filter);
         set_if_smaller(best.records_out, records_best_filter);
         if (filter)
         {
-          double filter_cost= range->fetch_cost;
-          filter= filter->apply_filter(thd, table, &filter_cost,
+          filter= filter->apply_filter(thd, table, &tmp,
                                        &records_after_filter,
                                        &startup_cost,
-                                       range->fetch_cost,
-                                       range->index_only_cost,
                                        range->ranges,
                                        record_count);
           if (filter)
           {
-            tmp= filter_cost;
-            /* Filter returns cost without WHERE_COST */
-            tmp= COST_ADD(tmp, records_after_filter *
-                          WHERE_COST_THD(thd));
-            tmp= COST_MULT(tmp, record_count);
-            tmp= COST_ADD(tmp, startup_cost);
-            startup_cost= 0;                    // Avoid adding it later
+            tmp.row_cost.cpu+= records_after_filter * WHERE_COST_THD(thd);
+            cur_cost= file->cost_for_reading_multiple_times(record_count, &tmp);
+            cur_cost= COST_ADD(cur_cost, startup_cost);
+            startup_cost= 0;                    // Avoid adding it again later
             table->opt_range[key_no].selectivity= filter->selectivity;
           }
         }
@@ -8998,6 +9017,7 @@ best_access_path(JOIN      *join,
       records_best_filter= records_after_filter=
         matching_candidates_in_table(s, 0, use_cond_selectivity);
       DBUG_ASSERT(records_after_filter <= s->records);
+      DBUG_ASSERT(records_after_filter <= s->found_records);
 
       set_if_smaller(best.records_out, records_after_filter);
 
@@ -9007,7 +9027,7 @@ best_access_path(JOIN      *join,
       if (s->cached_forced_index_type)
       {
         type=         s->cached_forced_index_type;
-        tmp=          s->cached_forced_index_cost;
+        cur_cost=     s->cached_forced_index_cost;
         forced_index= s->cached_forced_index;
       }
       else
@@ -9023,42 +9043,42 @@ best_access_path(JOIN      *join,
           {
             /* Use value from estimate_scan_time */
             forced_index= s->cached_covering_key;
-            tmp= s->cached_scan_and_compare_time;
+            cur_cost= s->cached_scan_and_compare_time;
           }
           else
           {
 #ifdef FORCE_INDEX_SHOULD_FORCE_INDEX_SCAN
             /* No cached key, use shortest allowed key */
-            key_map keys= *table->file->keys_to_use_for_scanning();
+            key_map keys= *file->keys_to_use_for_scanning();
             keys.intersect(table->keys_in_use_for_query);
             if ((forced_index= find_shortest_key(table, &keys)) < MAX_KEY)
             {
-              INDEX_READ_COST cost= cost_for_index_read(thd, table,
+              ALL_READ_COST cost= cost_for_index_read(thd, table,
                                                         forced_index,
                                                         s->records,
                                                         s->worst_seeks);
-              tmp= cost.read_cost;
+              cur_cost= file->cost(cost);
               /* Calculate cost of checking the attached WHERE */
-              tmp= COST_ADD(cost.read_cost,
+              cur_cost= COST_ADD(cur_cost,
                             s->records * WHERE_COST_THD(thd));
             }
             else
 #endif
             {
               /* No usable key, use table scan */
-              tmp= s->cached_scan_and_compare_time;
+              cur_cost= s->cached_scan_and_compare_time;
               type= JT_ALL;
             }
           }
         }
         else // table scan
         {
-          tmp= s->cached_scan_and_compare_time;
+          cur_cost= s->cached_scan_and_compare_time;
           type= JT_ALL;
         }
         /* Cache result for other calls */
         s->cached_forced_index_type= type;
-        s->cached_forced_index_cost= tmp;
+        s->cached_forced_index_cost= cur_cost;
         s->cached_forced_index= forced_index;
       }
 
@@ -9078,7 +9098,7 @@ best_access_path(JOIN      *join,
           If this is not the first table we have to compare the rows against
           all previous row combinations
         */
-        tmp= COST_MULT(tmp, record_count);
+        cur_cost= COST_MULT(cur_cost, record_count);
       }
       else
       {
@@ -9096,7 +9116,7 @@ best_access_path(JOIN      *join,
         refills= (1.0 + floor((double) cache_record_length(join,idx) *
                               (record_count /
                                (double) thd->variables.join_buff_size)));
-        tmp= COST_MULT(tmp, refills);
+        cur_cost= COST_MULT(cur_cost, refills);
 
         /* We come here only if there are already rows in the join cache */
         DBUG_ASSERT(idx != join->const_tables);
@@ -9112,14 +9132,14 @@ best_access_path(JOIN      *join,
                    records_after_filter * record_count *
                    ((idx - join->const_tables) * row_copy_cost +
                     WHERE_COST_THD(thd)));
-        tmp= COST_ADD(tmp, cmp_time);
+        cur_cost= COST_ADD(cur_cost, cmp_time);
       }
     }
 
     /* Splitting technique cannot be used with join cache */
     if (table->is_splittable())
       startup_cost= table->get_materialization_cost();
-    tmp+= startup_cost;
+    cur_cost+= startup_cost;
 
     if (unlikely(trace_access_scan.trace_started()))
     {
@@ -9129,7 +9149,7 @@ best_access_path(JOIN      *join,
         add("rows",               org_records).
         add("rows_after_filter",  records_after_filter).
         add("rows_out",           best.records_out).
-        add("cost",               tmp);
+        add("cost",               cur_cost);
       if (type == JT_ALL)
       {
         trace_access_scan.add("index_only",
@@ -9137,15 +9157,16 @@ best_access_path(JOIN      *join,
       }
     }
 
-    if (tmp + COST_EPS < best.cost)
+    if (cur_cost + COST_EPS < best.cost)
     {
       /*
         If the table has a range (s->quick is set) make_join_select()
         will ensure that this will be used
       */
-      best.cost= tmp;
+      best.cost= cur_cost;
       best.records_read= org_records;       // Records accessed
       best.records= records_after_filter;   // Records to be checked with WHERE
+
       /*
         If we are using 'use_cond_selectivity > 1' then
         table_after_join_selectivity may take into account other
@@ -11019,7 +11040,7 @@ best_extension_by_limited_search(JOIN      *join,
         trace_one_table
           .add("pruned_by_cost", true)
           .add("current_cost", current_read_time)
-          .add("best_cost",    join->best_read + COST_EPS);
+          .add("best_cost",    join->best_read);
 
         restore_prev_nj_state(s);
         restore_prev_sj_state(remaining_tables, s, idx);
@@ -13355,6 +13376,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 	  if (!sel->quick_keys.is_subset(tab->checked_keys) ||
               !sel->needed_reg.is_subset(tab->checked_keys))
 	  {
+            handler *file= tab->table->file;
             /*
               "Range checked for each record" is a "last resort" access method
               that should only be used when the other option is a cross-product
@@ -13370,9 +13392,7 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
 			     (sel->quick_keys.is_clear_all() ||
                               (sel->quick && 
                                sel->quick->read_time > 
-                               tab->table->file->
-                               ha_scan_and_compare_time(tab->table->file->
-                                                        stats.records)))) ?
+                               file->cost(file->ha_scan_and_compare_time(tab->table->file-> stats.records))))) ?
 	      2 : 1;
 	    sel->read_tables= used_tables & ~current_map;
             sel->quick_keys.clear_all();
@@ -14237,7 +14257,6 @@ uint check_join_cache_usage(JOIN_TAB *tab,
                             uint table_index,
                             JOIN_TAB *prev_tab)
 {
-  Cost_estimate cost;
   uint flags= 0;
   ha_rows rows= 0;
   uint bufsz= 4096;
@@ -14396,6 +14415,8 @@ uint check_join_cache_usage(JOIN_TAB *tab,
 
     if (!tab->is_ref_for_hash_join() && !no_bka_cache)
     {
+      Cost_estimate cost;
+      cost.reset();
       flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT;
       if (tab->table->covering_keys.is_set(tab->ref.key))
         flags|= HA_MRR_INDEX_ONLY;
@@ -15132,6 +15153,7 @@ void JOIN_TAB::cleanup()
 void JOIN_TAB::estimate_scan_time()
 {
   THD *thd= join->thd;
+  handler *file= table->file;
   double copy_cost;
 
   cached_covering_key= MAX_KEY;
@@ -15143,7 +15165,7 @@ void JOIN_TAB::estimate_scan_time()
                                   &startup_cost);
       table->opt_range_condition_rows= records;
       table->used_stat_records= records;
-      copy_cost= table->file->ROW_COPY_COST;
+      copy_cost= file->ROW_COPY_COST;
     }
     else
     {
@@ -15157,12 +15179,13 @@ void JOIN_TAB::estimate_scan_time()
       if (!table->covering_keys.is_clear_all() && ! table->no_keyread)
       {
         cached_covering_key= find_shortest_key(table, &table->covering_keys);
-        read_time= table->file->ha_key_scan_time(cached_covering_key, records);
+        read_time= file->cost(file->ha_key_scan_time(cached_covering_key,
+                                                     records));
         copy_cost= 0;                           // included in ha_key_scan_time
       }
       else
       {
-        read_time= table->file->ha_scan_time(records);
+        read_time= file->cost(file->ha_scan_time(records));
         copy_cost= 0;
       }
     }
@@ -15179,12 +15202,13 @@ void JOIN_TAB::estimate_scan_time()
     else
       memcpy(&table->s->optimizer_costs, &tmp_table_optimizer_costs,
              sizeof(tmp_table_optimizer_costs));
-    table->file->set_optimizer_costs(thd);
-    table->s->optimizer_costs_inited=1 ;
+    file->set_optimizer_costs(thd);
+    table->s->optimizer_costs_inited=1;
 
     records= table->stat_records();
     DBUG_ASSERT(table->opt_range_condition_rows == records);
-    read_time= table->file->ha_scan_time(MY_MAX(records, 1000)); // Needs fix..
+    // Needs fix..
+    read_time= file->cost(table->file->ha_scan_time(MY_MAX(records, 1000)));
     copy_cost= table->s->optimizer_costs.row_copy_cost;
   }
 
@@ -30253,7 +30277,7 @@ static bool get_range_limit_read_cost(const POSITION *pos,
       full index scan/cost.
     */
     double best_rows, range_rows;
-    double range_cost= (double) table->opt_range[keynr].fetch_cost;
+    double range_cost= (double) table->opt_range[keynr].cost.fetch_cost();
     best_rows= range_rows=  (double) table->opt_range[keynr].rows;
 
     if (pos)
@@ -30309,12 +30333,12 @@ static bool get_range_limit_read_cost(const POSITION *pos,
     N/(refkey_rows_estimate/table_records) > table_records
     <=> N > refkey_rows_estimate.
   */
-  INDEX_READ_COST cost= cost_for_index_read(table->in_use, table, keynr,
-                                            rows_to_scan,
-                                            pos ?
-                                            (ha_rows) pos->table->worst_seeks :
-                                            HA_ROWS_MAX);
-  *read_cost= (cost.read_cost +
+  ALL_READ_COST cost= cost_for_index_read(table->in_use, table, keynr,
+                                          rows_to_scan,
+                                          pos ?
+                                          (ha_rows) pos->table->worst_seeks :
+                                          HA_ROWS_MAX);
+  *read_cost= (table->file->cost(&cost) +
                rows_to_scan * WHERE_COST_THD(table->in_use));
   *read_rows= rows2double(rows_to_scan);
   return 0;
@@ -30449,7 +30473,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
   {
     /* Probably an update or delete. Assume we will do a full table scan */
     fanout= 1.0;
-    read_time= table->file->ha_scan_and_compare_time(rows_estimate);
+    read_time= table->file->cost(table->file->ha_scan_and_compare_time(rows_estimate));
     set_if_smaller(select_limit_arg, table_records);
   }
 
diff --git a/sql/sql_test.cc b/sql/sql_test.cc
index f50452a9d6c..6c2bbedef6d 100644
--- a/sql/sql_test.cc
+++ b/sql/sql_test.cc
@@ -383,7 +383,7 @@ void print_sjm(SJ_MATERIALIZATION_INFO *sjm)
   }
   fprintf(DBUG_FILE, "  }\n");
   fprintf(DBUG_FILE, "  materialize_cost= %g\n",
-          sjm->materialization_cost.total_cost());
+          sjm->materialization_cost);
   fprintf(DBUG_FILE, "  rows= %g\n", sjm->rows);
   fprintf(DBUG_FILE, "}\n");
   DBUG_UNLOCK_FILE;
diff --git a/sql/table.cc b/sql/table.cc
index 57844e7734c..0e196b64379 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -10460,17 +10460,26 @@ bool TABLE::export_structure(THD *thd, Row_definition_list *defs)
 inline void TABLE::initialize_opt_range_structures()
 {
   TRASH_ALLOC((void*)&opt_range_keys, sizeof(opt_range_keys));
-  TRASH_ALLOC(opt_range, s->keys * sizeof(*opt_range));
+  TRASH_ALLOC((void*)opt_range, s->keys * sizeof(*opt_range));
   TRASH_ALLOC(const_key_parts, s->keys * sizeof(*const_key_parts));
 }
 
 
 double TABLE::OPT_RANGE::index_only_fetch_cost(TABLE *table)
 {
-  return (index_only_cost +
+  return (table->file->cost(cost.index_cost)+
           (double) rows * table->s->optimizer_costs.key_copy_cost);
 }
 
+void TABLE::OPT_RANGE::get_costs(ALL_READ_COST *res)
+{
+  res->index_cost=       cost.index_cost;
+  res->row_cost=         cost.row_cost;
+  res->copy_cost=        cost.copy_cost;
+  res->max_index_blocks= max_index_blocks;
+  res->max_row_blocks=   max_row_blocks;
+}
+
 
 /*
   Mark table to be reopened after query
diff --git a/sql/table.h b/sql/table.h
index edeeb6e6241..218fb0e8104 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -1394,27 +1394,8 @@ public:
   {
     uint        key_parts;
     uint        ranges;
-    ha_rows     rows;
-    /*
-      The full cost of using 'range'. Includes fetching the rows
-      through keys, copying them and comparing the rows aginst the
-      WHERE clause.
-    */
-    double      cost;
-    /*
-      Cost of finding the key and fetching the row with row id.
-      In case of clustered keys or covering keys the fetch of the row is
-      not counted for.
-     */
-    double      find_cost;
-    /* find_cost + cost of copying the rows to record */
-    double      fetch_cost;
-    /*
-      Cost of fetching the keys, not including copying the keys to
-      record or comparing them with the WHERE clause. Used only when
-      working with filters.
-    */
-    double      index_only_cost;
+    ha_rows     rows, max_index_blocks, max_row_blocks;
+    Cost_estimate cost;
     /* Selectivity, in case of filters */
     double      selectivity;
     bool        first_key_part_has_only_one_value;
@@ -1424,6 +1405,7 @@ public:
       sql level.
     */
     double index_only_fetch_cost(TABLE *table);
+    void get_costs(ALL_READ_COST *cost);
   } *opt_range;
   /* 
      Bitmaps of key parts that =const for the duration of join execution. If
@@ -1818,12 +1800,12 @@ public:
   void prune_range_rowid_filters();
   void trace_range_rowid_filters(THD *thd) const;
   Range_rowid_filter_cost_info *
-  best_range_rowid_filter_for_partial_join(uint access_key_no,
-                                           double records,
-                                           double fetch_cost,
-                                           double index_only_cost,
-                                           double prev_records,
-                                           double *records_out);
+  best_range_rowid_filter(uint access_key_no,
+                          double records,
+                          double fetch_cost,
+                          double index_only_cost,
+                          double prev_records,
+                          double *records_out);
   /**
     System Versioning support
    */
diff --git a/sql/uniques.cc b/sql/uniques.cc
index 8555fc21624..1886ad278da 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -348,7 +348,7 @@ double Unique::get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size,
     First, add cost of writing all trees to disk, assuming that all disk
     writes are sequential.
   */
-  disk_read_cost= DISK_READ_COST_THD(thd);
+  disk_read_cost= default_optimizer_costs.disk_read_cost;
   result += disk_read_cost * n_full_trees *
               ceil(((double) key_size)*max_elements_in_tree / DISK_CHUNK_SIZE);
   result += disk_read_cost * ceil(((double) key_size)*last_tree_elems / DISK_CHUNK_SIZE);
@@ -365,8 +365,7 @@ double Unique::get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size,
     Add cost of reading the resulting sequence, assuming there were no
     duplicate elements.
   */
-  result+= (ceil((double)key_size*nkeys/IO_SIZE) *
-            default_optimizer_costs.disk_read_cost);
+  result+= (ceil((double)key_size*nkeys/IO_SIZE) * disk_read_cost);
 
   return result;
 }
diff --git a/storage/connect/ha_connect.h b/storage/connect/ha_connect.h
index a5b139a5689..c83584a62e4 100644
--- a/storage/connect/ha_connect.h
+++ b/storage/connect/ha_connect.h
@@ -309,7 +309,7 @@ public:
     Called in test_quick_select to determine if indexes should be used.
   */
   virtual IO_AND_CPU_COST scan_time()
-  { return { 0, (double) (stats.records+stats.deleted) * avg_io_cost() }; };
+  { return { 0, (double) (stats.records+stats.deleted) * DISK_READ_COST }; };
 
   /** @brief
     This method will never be called if you do not implement indexes.
diff --git a/storage/csv/ha_tina.h b/storage/csv/ha_tina.h
index 5a56dc6c4dd..856bb789320 100644
--- a/storage/csv/ha_tina.h
+++ b/storage/csv/ha_tina.h
@@ -126,9 +126,9 @@ public:
    */
   virtual IO_AND_CPU_COST scan_time()
   {
-    return { (double) ((share->saved_data_file_length + IO_SIZE-1))/ IO_SIZE *
-              avg_io_cost(),
-            (stats.records+stats.deleted) * ROW_NEXT_FIND_COST };
+    return
+    { (double) ((share->saved_data_file_length + IO_SIZE-1))/ IO_SIZE,
+        (stats.records+stats.deleted) * ROW_NEXT_FIND_COST };
   }
   /* The next method will never be called */
   virtual bool fast_key_read() { return 1;}
diff --git a/storage/example/ha_example.h b/storage/example/ha_example.h
index 3b11945b182..92acce5b7bb 100644
--- a/storage/example/ha_example.h
+++ b/storage/example/ha_example.h
@@ -156,7 +156,7 @@ public:
   {
     IO_AND_CPU_COST cost;
     /* 0 blocks,  0.001 ms / row */
-    cost.io= (double) (stats.records+stats.deleted) * avg_io_cost();
+    cost.io= (double) (stats.records+stats.deleted) * DISK_READ_COST;
     cost.cpu= 0;
     return cost;
   }
@@ -168,7 +168,7 @@ public:
                                        ulonglong blocks)
   {
     IO_AND_CPU_COST cost;
-    cost.io= blocks * avg_io_cost();
+    cost.io= blocks * DISK_READ_COST;
     cost.cpu= (double) rows * 0.001;
     return cost;
   }
@@ -181,7 +181,7 @@ public:
    IO_AND_CPU_COST cost;
     /* 0 blocks,  0.001 ms / row */
     cost.io= 0;
-    cost.cpu= (double) rows * avg_io_cost();
+    cost.cpu= (double) rows * DISK_READ_COST;
     return cost;
   }
 
diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h
index 35e5f5c8215..317271f60b4 100644
--- a/storage/federated/ha_federated.h
+++ b/storage/federated/ha_federated.h
@@ -186,20 +186,20 @@ public:
     DBUG_PRINT("info", ("records %lu", (ulong) stats.records));
     return
     {
-      (double) (stats.mean_rec_length * stats.records)/IO_SIZE * avg_io_cost(),
-        0
+      0,
+        (double) (stats.mean_rec_length * stats.records)/8192 * DISK_READ_COST+
+        1000,
     };
   }
-  IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
-  {
-    return { (double) stats.records * avg_io_cost(), 0 };
-  }
   IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
                                ulonglong blocks)
   {
-    return { (double) (ranges + rows) * avg_io_cost(), 0 };
+    return {0, (double) (ranges + rows) * DISK_READ_COST };
+  }
+  IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
+  {
+    return {0, (double) rows * DISK_READ_COST };
   }
-
   const key_map *keys_to_use_for_scanning() { return &key_map_full; }
   /*
     Everything below are methods that we implment in ha_federated.cc.
diff --git a/storage/federatedx/ha_federatedx.h b/storage/federatedx/ha_federatedx.h
index beebf405686..6876db5cbb8 100644
--- a/storage/federatedx/ha_federatedx.h
+++ b/storage/federatedx/ha_federatedx.h
@@ -364,26 +364,26 @@ public:
     Talk to Kostja about this - how to get the
     number of rows * ...
     disk scan time on other side (block size, size of the row) + network time ...
-    The reason for "records * 1000" is that such a large number forces
-    this to use indexes "
+    The reason for "1000" is that such a large number forces this to use indexes "
   */
   IO_AND_CPU_COST scan_time()
   {
     DBUG_PRINT("info", ("records %lu", (ulong) stats.records));
     return
     {
-      (double) (stats.mean_rec_length * stats.records)/8192 * avg_io_cost(),
-        0
+      0,
+        (double) (stats.mean_rec_length * stats.records)/8192 * DISK_READ_COST+
+        1000,
     };
   }
   IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
                                ulonglong blocks)
   {
-    return { (double) (ranges + rows) * avg_io_cost(), 0 };
+    return {0, (double) (ranges + rows) * DISK_READ_COST };
   }
   IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
   {
-    return { (double) rows * avg_io_cost(), 0 };
+    return {0, (double) rows * DISK_READ_COST };
   }
 
   const key_map *keys_to_use_for_scanning() { return &key_map_full; }
diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h
index 74a0a00a04c..663e75fc0fe 100644
--- a/storage/heap/ha_heap.h
+++ b/storage/heap/ha_heap.h
@@ -67,7 +67,6 @@ public:
                                ulonglong blocks) override;
   IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override;
   /* 0 for avg_io_cost ensures that there are no read-block calculations */
-  double avg_io_cost() override { return 0.0; }
 
   int open(const char *name, int mode, uint test_if_locked);
   int close(void);
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index fc20003c7f5..10de7f54603 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -14337,7 +14337,7 @@ ha_innobase::scan_time()
 		TODO: This will be further improved to return some approximate
 		estimate but that would also needs pre-population of stats
 		structure. As of now approach is in sync with MyISAM. */
-          return { (ulonglong2double(stats.data_file_length) / IO_SIZE * avg_io_cost()), 0.0 };
+          return { (ulonglong2double(stats.data_file_length) / IO_SIZE * DISK_READ_COST), 0.0 };
 	}
 
 	ulint	stat_clustered_index_size;
@@ -14347,7 +14347,7 @@ ha_innobase::scan_time()
 	stat_clustered_index_size =
 		m_prebuilt->table->stat_clustered_index_size;
 
-        cost.io= (double) stat_clustered_index_size * avg_io_cost();
+        cost.io= (double) stat_clustered_index_size * DISK_READ_COST;
         cost.cpu= 0;
 	return(cost);
 }
diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h
index 6ccf29c7042..a5e35540c55 100644
--- a/storage/myisammrg/ha_myisammrg.h
+++ b/storage/myisammrg/ha_myisammrg.h
@@ -106,7 +106,7 @@ public:
   {
     IO_AND_CPU_COST cost;
     cost.io= (ulonglong2double(stats.data_file_length) / IO_SIZE +
-              file->tables) * avg_io_cost();
+              file->tables),
     cost.cpu= records() * ROW_NEXT_FIND_COST;
     return cost;
   }
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 1dbf98245f7..7c1af7217ca 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -14660,6 +14660,15 @@ IO_AND_CPU_COST ha_rocksdb::keyread_time(uint index, ulong ranges,
   DBUG_RETURN(cost);
 }
 
+
+ulonglong ha_rocksdb::index_blocks(uint index, uint ranges, ha_rows rows)
+{
+  size_t len= table->key_storage_length(index);
+  ulonglong blocks= (rows * len / 4) / stats.block_size + ranges; // 75 % compression
+  return blocks * stats.block_size / IO_SIZE;
+}
+
+
 void ha_rocksdb::print_error(int error, myf errflag) {
   if (error == HA_ERR_ROCKSDB_STATUS_BUSY) {
     error = HA_ERR_LOCK_DEADLOCK;
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 2cfaf305682..f05f373cbfd 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -623,7 +623,7 @@ public:
                        bool sorted) override
       MY_ATTRIBUTE((__warn_unused_result__));
 
-  virtual IO_AND_CPU_COST scan_time() override
+  IO_AND_CPU_COST scan_time() override
   {
     IO_AND_CPU_COST cost;
     DBUG_ENTER_FUNC();
@@ -634,7 +634,8 @@ public:
   IO_AND_CPU_COST keyread_time(uint index, ulong ranges,
                                ha_rows rows, ulonglong blocks) override;
 
-  virtual void print_error(int error, myf errflag) override;
+  ulonglong index_blocks(uint index, uint ranges, ha_rows rows) override;
+  void print_error(int error, myf errflag) override;
 
   int open(const char *const name, int mode, uint test_if_locked) override
       MY_ATTRIBUTE((__warn_unused_result__));
diff --git a/storage/sequence/sequence.cc b/storage/sequence/sequence.cc
index eb79d25630c..fd95a897a46 100644
--- a/storage/sequence/sequence.cc
+++ b/storage/sequence/sequence.cc
@@ -86,7 +86,17 @@ public:
   void position(const uchar *record);
   int rnd_pos(uchar *buf, uchar *pos);
   int info(uint flag);
-
+  IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+                               ulonglong blocks) override
+  {
+    /* Avoids assert in total_cost() and makes DBUG_PRINT more consistent */
+    return {0,0};
+  }
+  IO_AND_CPU_COST scan_time()
+  {
+    /* Avoids assert in total_cost() and makes DBUG_PRINT more consistent */
+    return {0, 0};
+  }
   /* indexes */
   ulong index_flags(uint inx, uint part, bool all_parts) const
   { return HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER |
@@ -100,7 +110,6 @@ public:
   int index_last(uchar *buf);
   ha_rows records_in_range(uint inx, const key_range *start_key,
                            const key_range *end_key, page_range *pages);
-  double avg_io_cost() override { return 0.0; }
 
 private:
   void set(uchar *buf);
@@ -492,10 +501,14 @@ int ha_seq_group_by_handler::next_row()
 
 static void sequence_update_optimizer_costs(OPTIMIZER_COSTS *costs)
 {
+  costs->disk_read_cost= 0;
   costs->disk_read_ratio= 0.0;                // No disk
-  costs->key_next_find_cost= costs->key_lookup_cost=
-    costs->key_copy_cost= costs->row_lookup_cost=
-      costs->row_copy_cost= 0.0000062391530550;
+  costs->key_next_find_cost=
+    costs->key_lookup_cost=
+    costs->key_copy_cost=
+    costs->row_next_find_cost=
+    costs->row_lookup_cost=
+    costs->row_copy_cost= 0.0000062391530550;
 }
 
 /*****************************************************************************
diff --git a/storage/sphinx/ha_sphinx.h b/storage/sphinx/ha_sphinx.h
index 0b3883f107c..f5651fc6eb5 100644
--- a/storage/sphinx/ha_sphinx.h
+++ b/storage/sphinx/ha_sphinx.h
@@ -76,7 +76,7 @@ public:
 	{
           IO_AND_CPU_COST cost;
           cost.io= 0;
-          cost.cpu= (double) (stats.records+stats.deleted) * avg_io_cost();
+          cost.cpu= (double) (stats.records+stats.deleted) * DISK_READ_COST;
           return cost;
         }
         IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
author	Monty <monty@mariadb.org>	2022-09-30 17:10:37 +0300
committer	Sergei Petrunia <sergey@mariadb.com>	2023-02-02 23:57:30 +0300
commit	d9d0e78039fd3fbeac814edd27fabfe3e4450bc5 (patch)
tree	8881fa18f520319e112fc0b3a9b3a00a0eb56737
parent	7afa819f727144e8a107e28444e07d54045ab38e (diff)
download	mariadb-git-d9d0e78039fd3fbeac814edd27fabfe3e4450bc5.tar.gz