summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Docs/optimizer_costs.txt1160
-rw-r--r--client/mysql.cc58
-rw-r--r--include/my_getopt.h3
-rw-r--r--include/my_global.h1
-rw-r--r--include/my_tracker.h41
-rw-r--r--include/myisam.h2
-rw-r--r--mysql-test/include/analyze-format.inc2
-rw-r--r--mysql-test/main/analyze_format_json.result26
-rw-r--r--mysql-test/main/analyze_stmt_orderby.result6
-rw-r--r--mysql-test/main/ctype_collate.result2
-rw-r--r--mysql-test/main/delete.result1
-rw-r--r--mysql-test/main/except.result12
-rw-r--r--mysql-test/main/except_all.result12
-rw-r--r--mysql-test/main/fetch_first.result3
-rw-r--r--mysql-test/main/fulltext_order_by.test1
-rw-r--r--mysql-test/main/information_schema_all_engines.result8
-rw-r--r--mysql-test/main/intersect.result6
-rw-r--r--mysql-test/main/intersect_all.result6
-rw-r--r--mysql-test/main/key.result6
-rw-r--r--mysql-test/main/myisam.result10
-rw-r--r--mysql-test/main/mysqld--help.result45
-rw-r--r--mysql-test/main/opt_trace_security.result36
-rw-r--r--mysql-test/main/opt_trace_ucs2.result2
-rw-r--r--mysql-test/main/rowid_filter_innodb.result410
-rw-r--r--mysql-test/main/show_explain_json.result6
-rw-r--r--mysql-test/main/sp.result2
-rw-r--r--mysql-test/main/status.result18
-rw-r--r--mysql-test/main/table_elim.result11
-rw-r--r--mysql-test/main/type_ranges.result2
-rw-r--r--mysql-test/main/type_time_6065.result2
-rw-r--r--mysql-test/main/user_var.result4
-rw-r--r--mysys/my_getopt.c6
-rw-r--r--sql/CMakeLists.txt2
-rw-r--r--sql/filesort.cc4
-rw-r--r--sql/filesort_utils.cc23
-rw-r--r--sql/filesort_utils.h1
-rw-r--r--sql/ha_partition.cc127
-rw-r--r--sql/ha_partition.h13
-rw-r--r--sql/handler.cc219
-rw-r--r--sql/handler.h246
-rw-r--r--sql/item_func.cc2
-rw-r--r--sql/json_table.cc5
-rw-r--r--sql/keycaches.cc141
-rw-r--r--sql/keycaches.h2
-rw-r--r--sql/multi_range_read.cc110
-rw-r--r--sql/mysqld.cc107
-rw-r--r--sql/mysqld.h17
-rw-r--r--sql/opt_range.cc121
-rw-r--r--sql/opt_split.cc35
-rw-r--r--sql/opt_subselect.cc92
-rw-r--r--sql/opt_subselect.h8
-rw-r--r--sql/opt_trace.cc4
-rw-r--r--sql/optimizer_costs.h216
-rw-r--r--sql/optimizer_defaults.h183
-rw-r--r--sql/rowid_filter.cc19
-rw-r--r--sql/rowid_filter.h17
-rw-r--r--sql/set_var.cc8
-rw-r--r--sql/set_var.h2
-rw-r--r--sql/sql_bitmap.h12
-rw-r--r--sql/sql_class.cc1
-rw-r--r--sql/sql_class.h14
-rw-r--r--sql/sql_const.h14
-rw-r--r--sql/sql_explain.cc9
-rw-r--r--sql/sql_explain.h4
-rw-r--r--sql/sql_join_cache.cc2
-rw-r--r--sql/sql_plugin.h1
-rw-r--r--sql/sql_select.cc769
-rw-r--r--sql/sql_select.h42
-rw-r--r--sql/sql_show.cc68
-rw-r--r--sql/sql_yacc.yy2
-rw-r--r--sql/sys_vars.cc152
-rw-r--r--sql/sys_vars.inl141
-rw-r--r--sql/table.cc45
-rw-r--r--sql/table.h11
-rw-r--r--sql/uniques.cc31
-rw-r--r--sql/uniques.h2
-rw-r--r--storage/archive/ha_archive.cc60
-rw-r--r--storage/archive/ha_archive.h4
-rw-r--r--storage/connect/ha_connect.h11
-rw-r--r--storage/csv/ha_tina.h7
-rw-r--r--storage/example/ha_example.h33
-rw-r--r--storage/federated/ha_federated.cc24
-rw-r--r--storage/federated/ha_federated.h19
-rw-r--r--storage/federatedx/ha_federatedx.cc23
-rw-r--r--storage/federatedx/ha_federatedx.h18
-rw-r--r--storage/heap/ha_heap.cc59
-rw-r--r--storage/heap/ha_heap.h27
-rw-r--r--storage/innobase/btr/btr0cur.cc5
-rw-r--r--storage/innobase/handler/ha_innodb.cc53
-rw-r--r--storage/innobase/handler/ha_innodb.h8
-rw-r--r--storage/maria/ha_maria.cc47
-rw-r--r--storage/maria/ha_maria.h4
-rw-r--r--storage/maria/ma_pagecache.c2
-rw-r--r--storage/mroonga/ha_mroonga.cpp71
-rw-r--r--storage/mroonga/ha_mroonga.hpp15
-rw-r--r--storage/myisam/ha_myisam.cc28
-rw-r--r--storage/myisam/ha_myisam.h5
-rw-r--r--storage/myisammrg/ha_myisammrg.cc34
-rw-r--r--storage/myisammrg/ha_myisammrg.h14
-rw-r--r--storage/oqgraph/ha_oqgraph.h7
-rw-r--r--storage/perfschema/ha_perfschema.h6
-rw-r--r--storage/rocksdb/ha_rocksdb.cc9
-rw-r--r--storage/rocksdb/ha_rocksdb.h13
-rw-r--r--storage/sequence/sequence.cc12
-rw-r--r--storage/sphinx/ha_sphinx.h30
-rw-r--r--storage/spider/ha_spider.cc49
-rw-r--r--storage/spider/ha_spider.h10
-rw-r--r--storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test4
-rw-r--r--storage/spider/spd_table.cc20
-rwxr-xr-xtests/check_costs.pl1005
110 files changed, 5180 insertions, 1510 deletions
diff --git a/Docs/optimizer_costs.txt b/Docs/optimizer_costs.txt
new file mode 100644
index 00000000000..052c8e3e72a
--- /dev/null
+++ b/Docs/optimizer_costs.txt
@@ -0,0 +1,1160 @@
+This file is intended to explain some of the optimizer cost variables
+in MariaDB 10.11.
+
+Background
+==========
+
+Most timings has come from running:
+
+./check_costs.pl --rows=1000000 --socket=/tmp/mysql-dbug.sock --comment="--aria-pagecache-buffer-size=10G --innodb-buffer_pool_size=10G --key_buffer-size=1G --max-heap-table-size=10G"
+
+The MariaDB server is started with the options:
+--aria-pagecache-buffer-size=10G --innodb-buffer_pool_size=10G --key_buffer-size=1G --max-heap-table-size=10G"
+
+- All costs are changed to be milliseconds for engine operations and
+ other calculations, like the WHERE clause. This is a big change from
+ before the patch that added this file where the basic cost was a
+ disk seek and one index read and we assumed they had the same cost.
+- I am using Aria as the 'base' cost. This is because it caches all data,
+ which most other engines also would do.
+- MyISAM cannot be used as 'base' as it does not cache row data (which gives
+ a high overhead when doing row lookups).
+- Heap is in memory and a bit too special (no caching).
+- InnoDB is a clustered engine where secondary indexes has to use
+ the clustered index to find a row (not a common case among storage engines).
+
+The old assumption in the optimzer has 'always' been that
+1 cost = 1 seek = 1 index = 1 row lookup = 0.10ms.
+However 1 seek != 1 index or row look and this has not been reflected in
+most other cost.
+This document is the base of changing things so that 1 cost = 1ms.
+
+
+Setup
+=====
+
+All timings are calculated based on result from this computer:
+CPU: Intel(R) Xeon(R) W-2295 CPU @ 3.00GHz
+Memory: 256G
+Disk: Samsum SSD 860 (not really relevant in this case)
+Rows in tests: 1M Each test is run 3 times
+(one test to cache the data and 2 runs of which we take the average).
+
+The assumption is that other computers will have somewhat proportional
+timings. The timings are done with all data in memory (except MyISAM rows).
+This is reflected in the costs for the test by setting
+optimizer_disk_read_ratio=0.
+
+Note that even on a single Linux computer without any notable tasks
+the run time vary a bit from run to run (up to 4%), so the numbers in
+this document cannot be repeated exactly but should be good enough for
+the optimizer.
+
+Timings for disk accesses on other system can be changed by setting
+optimizer_disk_read_cost (usec / 4092 bytes) to match the read speed.
+
+Default values for check_costs.pl:
+optimizer_disk_read_ratio= 0 Everything is cached
+SCAN_LOOKUP_COST=1 Cost modifier for scan (for end user)
+set @@optimizer_switch='index_condition_pushdown=off'";
+
+
+ROW_COPY_COST and KEY_COPY_COST
+===============================
+
+Regarding ROW_COPY_COST:
+When calulating cost of fetching a row, we have two alternativ cost
+parts (in addition to other costs):
+scanning: rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST)
+rnd_pos: rows * (ROW_LOOKUP_COST + ROW_COPY_COST)
+
+In theory we could remove ROW_COPY_COST and just move the cost
+to the two other variables. However, in the future there may reason
+to be able to modif row_copy_cost per table depending on number and type
+of fields (A table of 1000 fields should have a higher row copy cost than
+a table with 1 field). Because of this, I prefer to keep ROW_COPY_COST
+around for now.
+
+Regarding KEY_COPY_COST:
+When calulating cost of fetching a key we have as part of the cost:
+keyread_time: rows * KEY_COPY_COST + ranges * KEY_LOOKUP_COST +
+ (rows-ranges) * KEY_NEXT_FIND_COST
+key_scan_time: rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST)
+
+We could remove KEY_COPY_COST by adding it to KEY_LOOKUP_COST and
+KEY_NEXT_FIND_COST but I prefer to keep it with the same argument as
+for ROW_COPY_COST.
+
+The reation between KEY_COPY_COST / (KEY_NEXT_FIND_COST + KEY_COPY_COST)
+is assumed to be 0.1577 (See analyze in the appendix)
+
+There is a relationship between the above costs in that for a clustered
+index the cost is calculated as ha_keyread_time() + ROW_COPY_COST.
+
+
+Preramble
+=========
+
+I tried first to use performance schema to get costs, but I was not
+successful as all timings I got for tables showed the total time
+executing the statement, not the timing for doing the actual reads.
+Also the overhead of performance schema affected the results
+
+With --performance-schema=on
+
+MariaDB [test]> select sum(1) from seq_1_to_100000000;
++-----------+
+| sum(1) |
++-----------+
+| 100000000 |
++-----------+
+1 row in set (4.950 sec)
+
+Performance schema overhead: 30.1%
+
+With:
+UPDATE performance_schema.setup_consumers SET ENABLED = 'YES';
+UPDATE performance_schema.setup_instruments SET ENABLED = 'YES', TIMED = 'YES';
+
+Flush with:
+CALL sys.ps_truncate_all_tables(FALSE);
+
+Performance schema overhead now: 32.9%
+
+Timings from:
+select * from events_statements_current where thread_id=80;
+
+MariaDB [test]> select 885402302809000-884884140290000;
++---------------------------------+
+| 885402302809000-884884140290000 |
++---------------------------------+
+| 518162519000 |
++---------------------------------+
+-> Need to divide by 1000000000000.0 to get seconds
+
+As seen above, the above gives the total statement time not the time
+spent to access the tables.
+
+In the end, I dediced to use analyze to find out the cost of the table
+actions:
+
+For example: Finding out table scan timing (and thus costs):
+
+analyze format=json select sum(1) from seq_1_to_100000000;
+r_table_time_ms": 1189.239022
+
+
+Calculating 'optimizer_where_cost'
+==================================
+
+To make the WHERE cost reasonble (not too low) we are assuming there is
+2 simple conditions in the default 'WHERE clause'
+
+MariaDB [test]> select benchmark(100000000,l_commitDate >= '2000-01-01' and l_tax >= 0.0) from test.check_costs limit 1;
++--------------------------------------------------------------------+
+| benchmark(100000000,l_commitDate >= '2000-01-01' and l_tax >= 0.0) |
++--------------------------------------------------------------------+
+| 0 |
++--------------------------------------------------------------------+
+1 row in set (3.198 sec)
+
+Time of where in seconds: 3.198 / 100000000 (100,000,000)
+
+Verification:
+
+select sum(1) from seq_1_to_100000000 where seq>=0.0 and seq>=-1.0;
++-----------+
+| sum(1) |
++-----------+
+| 100000000 |
++-----------+
+1 row in set (8.564 sec)
+
+MariaDB [test]> select sum(1) from seq_1_to_100000000;
++-----------+
+| sum(1) |
++-----------+
+| 100000000 |
++-----------+
+1 row in set (5.162 sec)
+
+Time of where= (8.564-5.162)/100000000 = 3.402/100000000 (100,000,000)
+(Result good enough, as sligthly different computations)
+
+check_costs.pl comes provides the numbers when using heap tables and 1M rows:
+
+simple where: 118.689 ms
+complex where: 138.474 ms
+no where: 83.699 ms
+
+Which gives for simple where:
+(118.689-83.699)/1000 = 0.034990000000000007 ms
+Which is in the same ballpark.
+
+We use the result from the select benchmark run as this has least overhead
+and is easiest to repeat and verify in a test.
+Which gives:
+optimizer_where_cost= 0.032 ms / WHERE.
+
+
+HEAP TABLE SCAN & ROW_COPY_COST
+===============================
+
+We start with heap as all rows are in memory and we don't have to take
+disk reads into account.
+
+select sum(l_partkey) from test.check_costs
+table_scan ms: 10.02078736
+rows: 1000000
+
+Cost should be 10.02078736 (scan cost) + 32 (where cost)
+
+cost= scan_time() * optimizer_cache_cost * SCAN_LOOKUP_COST +
+ TABLE_SCAN_SETUP_COST +
+ records * (ROW_COPY_COST + ROW_LOOKUP_COST + WHERE_COMPARE_COST);
+
+=>
+We are ignoring TABLE_SCAN_SETUP (which is just to prefer index lookup on small
+tables).
+We can also ignore records * WHERE_COMPARE_COST as we don't have that
+in the above calcuated 'ms'.
+row_costs= (ROW_COPY_COST + ROW_LOOKUP_COST)
+
+cost= scan_time() * 1 * 1 +
+ 1000000.0 * (row_costs)
+=>
+cost= time_per_row*1000000 + row_costs * 1000000;
+=>
+time_per_row+row_cost= cost/1000000
+
+Let's assume that for heap, finding the next row is 80 % of the time and
+copying the row (a memcmp) to upper level is then 20 %.
+(This is not really important, we could put everthing in heap_scan_time,
+but it's good to have split the data as it gives us more options to
+experiment later).
+
+row_lookup_cost= 10.02078736/1000000*0.8 = 8.0166298880000005e-06
+row_copy_cost= 10.02078736/1000000*0.2 = 2.0041574720000001e-06
+
+Conclusion:
+heap_scan_time= 8.0166e-06
+row_copy_cost= 2.0042e-06
+
+Heap doesn't support key only read, so key_copy_cost is not relevant for it.
+
+
+HEAP INDEX SCAN
+===============
+
+select count(*) from test.check_costs_heap force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0
+index_scan time: 79.7286117 ms
+
+Index scan on heap tables can only happen with binary trees.
+l_supp_key is using a binary tree.
+
+cost= (ranges + rows + 1) * BTREE_KEY_NEXT_FIND_COST + rows * row_copy_cost=
+(for large number of rows):
+rows * (BTREE_KEY_NEXT_FIND_COST + row_copy_cost)
+
+BTREE_KEY_NEXT_FIND_COST= cost/rows - row_copy_cost =
+79.7286117/1000000- 2.334e-06= 0.0000773946117
+
+
+HEAP EQ_REF
+===========
+
+select straight_join count(*) from seq_1_to_1000000,test.check_costs_heap where seq=l_linenumber
+eq_ref_index_join time: 175.874165 of which 12.57 is from seq_1_to_1000000
+
+Note: This is 34% of the cost of an Aria table with index lookup and
+ 20% of an Aria table with full key+row lookup.
+
+cost= rows * (key_lookup_cost + row_copy_cost)
+key_lookup_cost= cost/rows - key_copy_cost =
+(175.874165-12.57)/1000000 - 2.334e-06 = 0.00016097016500000002
+
+
+HEAP EQ_REF on binary tree index
+================================
+
+select straight_join count(*) from seq_1_to_1000000,test.check_costs_heap where seq=l_extra and l_partkey >= 0
+eq_ref_join time: 241.350539 ms of which 12.57 is from seq_1_to_1000000
+
+rows * (tree_find_cost() + row_copy_cost) =
+
+tree_find_cost()= cost/rows - row_copy_cost =
+
+(241.350539-12.57)/1000000 - 2.334e-06= 0.000226446539
+
+tree_find_cost() is defined as key_compare_cost * log2(table_rows)
+->
+key_compare_cost= 0.000226446539/log2(1000000) = 0.000011361200108882259;
+
+
+SEQUENCE SCAN
+=============
+
+analyze format=json select sum(seq+1) from seq_1_to_1000000;
+r_table_time_ms: 12.47830611
+
+Note that for sequence index and table scan is the same thing.
+We need to have a row_copy/key_copy cost as this is used when doing
+an key lookup for sequence. Setting these to 50% of the full cost
+should be sufficent for now.
+
+Calculation sequence_scan_cost:
+
+When ignoring reading from this, the cost of table scan is:
+rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST)
+
+The cost of key scan is:
+ranges * KEY_LOOKUP_COST + (rows - ranges) * KEY_NEXT_FIND_COST +
+rows * KEY_COPY_COST;
+
+As there is no search after first key for sequence, we can set
+KEY_LOOKUP_COST = KEY_NEXT_FIND_COST.
+
+This gives us:
+
+r_table_time_ms = (ROW_NEXT_FIND_COST + ROW_COPY_COST) =
+ (KEY_NEXT_FIND_COST + KEY_COPY_COST) * 1000000;
+
+->
+ROW_NEXT_FIND_COST= ROW_COPY_COST = KEY_LOOKUP_COST + KEY_COPY_COST=
+12.47830611/1000000/2 = 0.0000062391530550
+
+
+HEAP KEY LOOKUP
+===============
+
+We can use this code to find the timings of a index read in a table:
+
+analyze format=json select straight_join count(*) from seq_1_to_1000000,check_costs where seq=l_orderkey
+
+"query_block": {
+ "select_id": 1,
+ "r_loops": 1,
+ "r_total_time_ms": 420.5083447,
+ "table": {
+ "table_name": "seq_1_to_1000000",
+ "access_type": "index",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "8",
+ "used_key_parts": ["seq"],
+ "r_loops": 1,
+ "rows": 1000000,
+ "r_rows": 1000000,
+ "r_table_time_ms": 12.47830611,
+ "r_other_time_ms": 44.0671283,
+ "filtered": 100,
+ "r_filtered": 100,
+ "using_index": true
+ },
+ "table": {
+ "table_name": "check_costs",
+ "access_type": "eq_ref",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "4",
+ "used_key_parts": ["l_orderkey"],
+ "ref": ["test.seq_1_to_1000000.seq"],
+ "r_loops": 1000000,
+ "rows": 1,
+ "r_rows": 1,
+ "r_table_time_ms": 160
+ "filtered": 100,
+ "r_filtered": 100,
+ "attached_condition": "seq_1_to_1000000.seq = check_costs.l_orderkey"
+ }
+ }
+
+This gives the time for a key lookup on hash key as:
+160/10000000 - row_copy_cost =
+160/1000000.0 - 2.0042e-06 = 0.00015799580000000002
+
+
+ARIA TABLE SCAN
+===============
+(page format, all rows are cached)
+
+table_scan ms: 107.315698
+
+Cost is calculated as:
+
+blocks= stats.data_file_length / stats.block_size) = 122888192/4096= 30002
+engine_blocks (8192 is block size in Aria) = 15001
+
+cost= blocks * avg_io_cost() *
+ optimizer_cache_cost * SCAN_LOOKUP_COST +
+ engine_blocks * INDEX_BLOCK_COPY_COST +
+ TABLE_SCAN_SETUP_COST +
+ records * (ROW_NEXT_FIND_COST + ROW_COPY_COST));
+
+When all is in memory (optimizer_cache_cost= 0) we get:
+
+cost= blocks * INDEX_BLOCK_COPY_COST +
+ TABLE_SCAN_SETUP_COST +
+ records * (ROW_NEXT_FIND_COST + ROW_COPY_COST));
+
+To calculate INDEX_BLOCK_COPY_COST I added a temporary tracker in
+ma_pagecache.cc::pagecache_read() and did run the same query.
+I got the following data:
+{counter = 17755, sum = 1890559}
+Which give me the time for copying a block to:
+1000.0*1890559/sys_timer_info.cycles.frequency/17755 = 3.558138826971332e-05 ms
+And thus INDEX_BLOCK_COPY_COST= 0.035600
+
+Replacing known constants (and ignore TABLE_SCAN_SETUP_COST):
+cost= 107.315698 = 15001 * 3.56e-5 + 1000000 * aria_row_copy_costs;
+
+aria_row_copy_costs= (107.315698 - (15001 * 3.56e-5))/1000000 =
+0.0001067816624
+
+As ROW_COPY_COST/ROW_NEXT_FIND_COST= 0.57 (See appendex)
+
+ROW_COPY_COST= 0.0001067816624 * 0.57 = 0.000060865547560
+ROW_NEXT_FIND_COST= 0.0001067816624 * 0.43 = 0.000045916114832
+
+
+Aria, INDEX SCAN
+================
+
+Finding out cost of reading X keys from an index (no row lookup) in Aria.
+
+Query: select count(*) from test.check_costs_aria force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0
+Table access time: ms: 98.1427158
+
+blocks= index_size/IO_SIZE =
+(rows * tot_key_length / INDEX_BLOCK_FILL_FACTOR) / IO_SIZE
+->
+1000000 * 19 / 0.75/ 4096 = 6184
+engine_blocks (block_size 8192) = 6184/2 = 3092
+(Range optimzer had calculated 3085)
+
+keyread_time= blocks * avg_io_cost() * cache + engine_blocks * INDEX_BLOCK_COPY_COST + rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST);
+= engine_blocks * INDEX_BLOCK_COPY_COST + rows * KEY_NEXT_FIND_COST=
+ 3092 * 3.56e-05 + 1000000 * (KEY_NEXT_FIND_COST + KEY_COPY_COST)
+->
+KEY_NEXT_FIND_COST + KEY_COPY_COST= (98.1427158 - 3092 * 3.56e-05)/1000000 =
+0.0000980326406;
+
+KEY_COPY_COST= 0.0000980326406 * 0.16 = 0.000015685222496
+KEY_NEXT_FIND_COST= 0.0000980326406 * 0.84 = 0.000082347418104
+
+
+Aria, RANGE SCAN (scan index, fetch a row for each index entry)
+===============================================================
+
+Query:
+select sum(l_orderkey) from test.check_costs_aria force index(l_suppkey) where l_suppkey >= 0 and l_partkey >=0
+range_scan ms: 309.7620909
+
+cost= keyread_time + rnd_pos_time.
+keyread_time is as above in index scan, but whithout KEY_COPY_COST:
+keyread_time= 98.1427158 - KEY_COPY_COST * 1000000=
+98.1427158 - 0.000015685222496 * 1000000= 82.457493304000000;
+rnd_pos_time= 309.7620909 - 82.457493304000000 = 227.304597596000000
+
+rnd_pos_time() = io_cost + engine_mem_cost +
+ rows * (ROW_LOOKUP_COST + ROW_COPY_COST) =
+rows * avg_io_cost() * engine_block_size/IO_SIZE +
+rows * INDEX_BLOCK_COPY_COST +
+rows * (ROW_COPY_COST + ROW_LOOKUP_COST)
+= (When rows are in memory)
+rows * INDEX_BLOCK_COPY_COST +
+rows * (ROW_COPY_COST + ROW_LOOKUP_COST)
+
+This gives us:
+227.304597596000000 = 1000000 * 3.56e-05 + 1000000*(0.000060865547560 + ROW_LOOKUP_COST)
+->
+ROW_LOOKUP_COST= (227.304597596000000 - 1000000 * 3.56e-05 - 1000000*0.000060865547560) / 1000000 = 0.0001308390500
+
+
+Aria, EQ_REF with index_read
+============================
+
+select straight_join count(*) from seq_1_to_1000000,test.check_costs_aria where seq=l_linenumber
+eq_ref_index_join 499.631749 ms
+
+According to analyze statement:
+
+- Cost for SELECT * from seq_1_to_1000000: 12.57
+ (From Last_query_cost after the above costs has been applied)
+- Time from check_costs: eq_ref's: 499.631749- 12.57s = 487.061749
+
+cost= rows * (keyread_time(1,1) + KEY_COPY_COST)
+
+keyread_time(1,1)= INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST;
+
+cost= rows * (KEY_COPY_COST + INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST)
+->
+KEY_LOOKUP_COST= cost/rows - 0.000015685222496 - 0.000035600
+KEY_LOOKUP_COST= 487.061749 / 1000000 - 0.000035600 - 0.000015685222496
+KEY_LOOKUP_COST= 0.000435776526504
+
+
+MyISAM, TABLE SCAN
+==================
+
+select sum(l_partkey) from test.check_costs_myisam
+table_scan ms: 126.353364
+
+check_costs.MYD: 109199788 = 26660 IO_SIZE blocks
+The row format for MyISAM is similar to Aria, so we use the same
+ROW_COPY_COST for Aria.
+
+cost= blocks * avg_io_cost() *
+ optimizer_cache_cost * SCAN_LOOKUP_COST +
+ engine_blocks * INDEX_BLOCK_COPY_COST +
+ TABLE_SCAN_SETUP_COST +
+ rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST));
+
+MyISAM is using the file system as a row cache.
+Let's put the cost of accessing the row in ROW_NEXT_FIND_COST.
+Everything is cached (by the file system) and optimizer_cache_cost= 0;
+
+cost= engine_blocks * INDEX_BLOCK_COPY_COST +
+ TABLE_SCAN_SETUP_COST +
+ rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST))
+
+ROW_NEXT_FIND_COST=
+(costs - engine_blocks * ROW_BLOCK_COPY_COST - TABLE_SCAN_SETUP_COST)/rows -
+ROW_COPY_COST
+=
+(126.353364 - 26660 * 3.56e-05 - 1)/1000000 - 0.000060865547560
+ROW_NEXT_FIND_COST= 0.00006353872044
+
+
+MyISAM INDEX SCAN
+=================
+
+select count(*) from test.check_costs_myisam force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0;
+index_scan ms: 106.490584
+
+blocks= index_size/IO_SIZE =
+(rows * tot_key_length / INDEX_BLOCK_FILL_FACTOR) / IO_SIZE
+->
+1000000 * 19 / 0.75/ 4096 = 6184
+As MyISAM has a block size of 4096 for this table, engine_blocks= 6184
+
+cost= keyread_time= blocks * avg_io_cost() * cache + engine_blocks * INDEX_BLOCK_COPY_COST + rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST);
+->
+cost= engine_blocks * INDEX_BLOCK_COPY_COST + rows * KEY_NEXT_FIND_COST
+
+Assuming INDEX_BLOCK_COPY_COST is same as in Aria and the code for
+key_copy is identical to Aria:
+cost= 6184 * 3.56e-05 + 1000000 * (KEY_NEXT_FIND_COST + KEY_COPY_COST)
+->
+KEY_NEXT_FIND_COST= (106.490584 - 6184 * 3.56e-05)/1000000 - 0.000015685222496=
+0.000090585211104
+
+
+MyISAM, RANGE SCAN (scan index, fetch a row for each index entry)
+=================================================================
+
+select sum(l_orderkey) from test.check_costs_myisam force index(l_suppkey) where l_suppkey >= 0 and l_partkey >=0 and l_discount>=0.0
+time: 1202.0894 ms
+
+cost= keyread_time + rnd_pos_time.
+keyread_time is as above in MyISAM INDEX SCAN, but without KEY_COPY_COST:
+keyread_time= 106.490584 - KEY_COPY_COST * 1000000=
+106.490584 - 0.000015685222496 * 1000000= 90.805361504000000;
+rnd_pos_time= 1202.0894 - 90.805361504000000 = 1111.284038496000000
+
+rnd_pos_time() = io_cost + engine_mem_cost +
+ rows * (ROW_LOOKUP_COST + ROW_COPY_COST) =
+rows * avg_io_cost() * engine_block_size/IO_SIZE +
+rows * INDEX_BLOCK_COPY_COST +
+rows * (ROW_COPY_COST + ROW_LOOKUP_COST)
+= (When rows are in memory)
+rows * INDEX_BLOCK_COPY_COST +
+rows * (ROW_COPY_COST + ROW_LOOKUP_COST)
+
+This gives us:
+ 1111.284038496000000 = 1000000 * 3.56e-05 + 1000000*(0.000060865547560 + ROW_LOOKUP_COST)
+->
+ROW_LOOKUP_COST= ( 1111.284038496000000 - 1000000 * (3.56e-05 + 0.000060865547560)) / 1000000s
+->
+ROW_LOOKUP_COST= 0.001014818490936
+
+As the row is never cached, we have to ensure that rnd_pos_time()
+doesn't include an io cost (which would be affected by
+optimizer_cache_hit_ratio). This is done by having a special
+ha_myisam::rnd_pos_time() that doesn't include io cost but instead an
+extra cpu cost.
+
+
+MyISAM, EQ_REF with index_read
+==============================
+
+select straight_join count(*) from seq_1_to_1000000,test.check_costs_myisam where seq=l_linenumber;
+eq_ref_join ms: 613.906777 of which 12.48 ms is for seq_1_to_1000000;
+
+According to analyze statement:
+
+- Cost for SELECT * from seq_1_to_1000000: 12.48 (See sequence_scan_cost)
+- Time from check_costs: eq_ref's: 613.906777- 12.48 = 601.426777;
+
+cost= rows * (keyread_time(1) + KEY_COPY_COST)
+
+keyread_time(1)= INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST;
+
+cost= rows * (KEY_COPY_COST + INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST)
+->
+KEY_LOOKUP_COST= cost/rows - INDEX_BLOCK_COPY_COST - KEY_COPY_COST;
+601.426777 / 1000000 - 3.56e-05 - 0.000015685222496 = 0.00055014155451
+KEY_LOOKUP_COST= 0.00055014155451
+
+
+
+InnoDB, TABLE SCAN
+==================
+
+select sum(l_quantity) from check_costs_innodb;
+table_scan 131.302492
+Note that InnoDB reported only 956356 rows instead of 100000 in stats.records
+This will will cause the optimizer to calculate the costs based on wrong
+assumptions.
+
+As InnoDB have a clustered index (which cost is a combination of
+KEY_LOOKUP_COST + ROW_COPY_COST), we have to ensure that the
+relationship between KEY_COPY_COST and ROW_COPY_COST is close to the
+real time of copying a key and a row.
+
+I assume, for now, that the row format for InnoDB is not that
+different than for Aria (in other words, computation to unpack is
+about the same), so lets use the same ROW_COPY_COST (0.000060865547560)
+
+I am ignoring the fact that InnoDB can optimize row copying by only
+copying the used fields as the optimizer currently have to take that
+into account. (This would require a way to update ROW_COPY_COST /
+table instance in the query).
+
+For now, lets also use the same value as Aria for
+INDEX_BLOCK_COPY_COST (3.56e-05).
+
+The number of IO_SIZE blocks in the InnoDB data file is 34728 (from gdb))
+(For reference, MyISAM was using 26660 and Aria 30002 blocks)
+As InnoDB is using 16K blocks, the number of engine blocks= 34728/4= 8682
+
+cost= blocks * avg_io_cost() *
+ optimizer_cache_cost * SCAN_LOOKUP_COST +
+ engine_blocks * INDEX_BLOCK_COPY_COST +
+ TABLE_SCAN_SETUP_COST +
+ rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST));
+
+as optimizer_cache_cost = 0
+
+cost= engine_blocks * INDEX_BLOCK_COPY_COST +
+ TABLE_SCAN_SETUP_COST +
+ rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST))
+
+ROW_NEXT_FIND_COST=
+(costs - engine_blocks * ROW_BLOCK_COPY_COST - TABLE_SCAN_SETUP_COST)/rows -
+ROW_COPY_COST
+= (Ignoring TABLE_SCAN_SETUP_COST, which is just 10 usec)
+(131.302492 - 8682 * 3.56e-05)/1000000 - 0.000060865547560 =
+0.00007012786523999997
+
+
+InnoDB INDEX SCAN
+=================
+
+select count(*) from check_costs_innodb force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0;
+index_scan 114.733037 ms
+Note that InnoDB is reporting 988768 rows instead of 1000000
+(The number varies a bit between runs. At another run I got 956356 rows)
+With default costs (as of above), we get a query cost of 112.142. This can
+still be improved a bit...
+
+blocks= index_size/IO_SIZE =
+(rows * tot_key_length / INDEX_BLOCK_FILL_FACTOR) / IO_SIZE
+-> (total_key_length is 17 in InnoDB, 19 in Aria)
+1000000 * 17 / 0.75/ 4096 = 5533
+engine_blocks= 5533/4 = 1383
+
+(In reality we get 5293 blocks and 1323 engine blocks, because of the
+difference in InnoDB row count)
+
+cost= keyread_time= blocks * avg_io_cost() * cache + engine_blocks * INDEX_BLOCK_COPY_COST + rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST);
+->
+cost= engine_blocks * INDEX_BLOCK_COPY_COST + rows * KEY_NEXT_FIND_COST
+
+Assuming INDEX_BLOCK_COPY_COST is same as in Aria:
+(Should probably be a bit higher as block_size in InnoDB is 16384
+compared to 8192 in Aria)
+
+cost= 1383 * 3.56e-05 + 1000000 * (KEY_NEXT_FIND_COST + KEY_COPY_COST)
+=
+KEY_NEXT_FIND_COST + KEY_COPY_COST= (114.733037 - 1383 * 3.56e-05)/1000000
+=
+KEY_NEXT_FIND_COST= (114.733037 - 1383 * 3.56e-05)/1000000 - 0.000015685222496
+->
+KEY_NEXT_FIND_COST=0.000098998579704;
+
+Setting this makes InnoDB calculate the cost to 113.077711 (With estimate of
+988768 rows)
+If we would have the right number of rows in ha_key_scan_time, we would
+have got a cost of:
+
+Last_query_cost: 145.077711 (Including WHERE cost for 988768 row)
+(145.077711)/988768*1000000.0-32 = 114.72573444933
+
+
+InnoDB RANGE SCAN
+=================
+
+select sum(l_orderkey) from check_costs_innodb force index(l_suppkey) where l_suppkey >= 0 and l_partkey >=0 and l_discount>=0.0
+range_scan 961.4857045 ms
+Note that InnoDB was reporting 495340 rows instead of 1000000 !
+I added a patch to fix this and now InnoDB reports 990144 rows
+
+cost= keyread_time + rnd_pos_time.
+keyread_time is as above in index scan, but we want it without KEY_COPY_COST:
+keyread_time= cost - KEY_COPY_COST * 1000000=
+114.733037 - 0.000015685222496 * 1000000= 99.047814504000000
+rnd_pos_time= 961.4857045 - 99.047814504000000 = 862.437889996000000
+
+rnd_pos_time() = io_cost + engine_mem_cost +
+ rows * (ROW_LOOKUP_COST + ROW_COPY_COST) =
+rows * avg_io_cost() * engine_block_size/IO_SIZE +
+rows * INDEX_BLOCK_COPY_COST +
+rows * (ROW_COPY_COST + ROW_LOOKUP_COST)
+= (When rows are in memory)
+
+rows * (INDEX_BLOCK_COPY_COST + ROW_COPY_COST + ROW_LOOKUP_COST)
+
+This gives us:
+862.437889996000000 = 1000000 * 3.56e-05 + 1000000*(0.000060865547560 + ROW_LOOKUP_COST)
+->
+ROW_LOOKUP_COST= (862.437889996000000 - 1000000*(3.56e-05+0.000060865547560)) / 1000000
+->
+ROW_LOOKUP_COST= 0.000765972342436
+
+Setting this makes InnoDB calculate the cost to 961.081050 (good enough)
+
+
+InnodDB EQ_REF with index_read
+==============================
+
+select straight_join count(*) from seq_1_to_1000000,test.check_costs_innodb where seq=l_linenumber
+time: 854.980610 ms
+
+Here the engine first has to do a key lookup and copy the key to the upper
+level (Index only read).
+
+According to analyze statement:
+
+- Cost for SELECT * from seq_1_to_1000000: 12.57 (See sequence_scan_cost)
+- Time from check_costs: eq_ref_join: 854.980610
+ This is time for accessing both seq_1_to_1000000 and check_costs
+ time for check_cost_innodb: 854.980610-12.57 = 842.410610 ms
+
+cost= rows * (keyread_time(1,1) + KEY_COPY_COST)
+
+keyread_time(1,1)= INDEX_BLOCK_COPY_COST + ranges * KEY_LOOKUP_COST +
+ (rows-ranges) * KEY_NEXT_FIND_COST
+
+As rows=1 and ranges=1:
+
+keyread_time(1,1)= INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST
+
+cost= rows * (KEY_COPY_COST + INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST)
+->
+KEY_LOOKUP_COST= cost/rows - INDEX_BLOCK_COPY_COST - KEY_COPY_COST;
+842.410610 / 1000000 - 3.56e-05 - 0.000015685222496
+->
+KEY_LOOKUP_COST= 0.000791125387504;
+
+After the above we have
+last_query_cost=918.986438;
+
+The cost for check_costs_innodb =
+last_query_cost - sequence_scan_cost - where_cost*2 =
+918.986438 - 12.57 - 32*2 = 842.416438 (ok)
+
+
+InnodDB EQ_REF with clustered index read
+========================================
+
+select straight_join count(*) from seq_1_to_1000000,check_costs_innodb where seq=l_orderkey
+eq_ref_cluster_join time: 972.290773 ms
+
+According to analyze statement:
+- Cost for SELECT * from seq_1_to_1000000: 12.57 (See sequence_scan_cost)
+- Time from check_costs: eq_ref_cluster_join: 972.290773 ms
+ This is time for accessing both seq_1_to_1000000 and check_costs_innodb.
+ Time for check_cost_innodb: 972.290773 - 12.57 = 959.790773
+
+The estimated cost is 875.0160
+
+cost= rows * (keyread_time(1,1) +
+ ranges * ROW_LOOKUP_COST +
+ (rows - ranges) * ROW_NEXT_FIND_COST +
+ rows * ROW_COPY_COST)
+
+As rows=1 and ranges=1:
+
+cost= rows * (INDEX_BLOCK_COPY_COST + ROW_LOOKUP_COST + ROW_COPY_COST);
+->
+ROW_LOOKUP_COST= cost/rows - INDEX_BLOCK_COPY_COST - ROW_COPY_COST;
+959.790773 / 1000000 - 3.56e-05 - 0.000060865547560
+->
+ROW_LOOKUP_COST= 0.0008633252254400001
+
+From InnoDB RANGE SCAN we have ROW_LOOKUP_COST=0.000765972342436
+From EQ_REF with index read we have KEY_LOOKUP_COST= 0.000791125387504,
+which should in theory be identical to ROW_LOOKUP_COST,
+
+For now we have to live with the difference (as I want to have the project done
+for the next release).
+
+The difference could be come from the following things:
+
+- InnoDB estimation of rows in the range scan test is a bit off.
+- Maybe the work to find a row from an internal key entry compared to
+ a external key is a bit difference (less checking/conversions)
+- There is different keys used for range scan and this test that could have
+ different costs
+- Maybe we should increase ROW_COPY_COST or ROW_LOOKUP_COST for InnoDB
+ and adjust other costs.
+
+
+Some background. In range scan, the cost is:
+- Scanning over all keys
+ - For each key, fetch row using rowid
+
+For the EQ_REF cache
+- Scan seq_1_to_1000000
+ for each value in seq
+ do a index_read() call
+
+
+Archive scan cost
+=================
+
+table_scan time: 757.390280 ms
+rows: 1000000
+file size: 32260650 = 7878 IO_SIZE blocks
+
+cost= scan_time() + TABLE_SCAN_SETUP_COST +
+ records * (ROW_COPY_COST + ROW_LOOKUP_COST + WHERE_COMPARE_COST);
+
+757.390280 = scan_time() + 10 + 1000000 * (0.060866+0.032000)
+->
+scan_time()= 757.390280 - (10 + 1000000 * (0.060866+0.032000)/1000) = 654.52428
+
+scan_time() is defined as:
+
+cost.cpu= (blocks * DISK_READ_COST * DISK_READ_RATIO +
+ blocks * ARCHIVE_DECOMPRESS_TIME);
+
+Default values for above:
+blocks= 7878
+DISK_READ_COST: 10.240000 usec
+DIUSK_READ_RATIO= 0.20
+->
+ARCHIVE_COMPRESS_TIME= (654.52428 - (7878 * 10.240000/1000*0.2)) / 7878 =
+0.081034543792841
+
+
+Future improvements
+===================
+
+The current costs are quite good for tables of 1M rows (usually about
+10% from the true cost for the test table).
+
+For smaller tables the costs will be a bit on the high side and for
+bigger tables a bit on the low size for eq_ref joins (both with index
+and with row lookup).
+
+The only engine that takes into account the number of rows for key lookups
+is heap with binary-tree indexes.
+
+Ideas of how to fix this:
+
+- Change KEY_LOOKUP_COST, INDEX_BLOCK_COPY_COST and ROW_LOOKUP_COST
+ (for clustered index) to take into account the hight of the B tree.
+
+
+Appendix
+========
+
+Observations
+============
+
+Ratio between table scan and range scan
+
+Quereyies used:
+select sum(l_quantity) from check_costs_aria;
+select sum(l_orderkey) from test.check_costs_aria force index(l_suppkey) where l_suppkey >= 0 and l_partkey >=0 and l_discount>=0.0;
+
+The test for Aria shows that cost ratio of range_scan/table_scan are:
+disk_read_ratio=0 341.745207/139.348286= 2.4524536097
+disk_read_ratio=0.02 752.408528/145.748695= 5.1623688843
+disk_read_ratio=0.20 4448.378423/203.352382= 21.8752216190
+
+As we are using disk_read_ratio=0.02 by default, this means that in
+mtr to not use table scan instead of range, we have to ensure that the
+range does not cover more than 1/5 of the total rows.
+
+
+Trying to understand KEY_COPY_COST
+==================================
+
+An index scan with 2 and 4 key parts on an Aria table.
+The index has null key parts, so packed keys are used.
+
+Query1 "index_scan" (2 integer key parts, both key parts may have NULLS):
+select count(*) from $table force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0");
+
+- Optimized build: Average 164 ms/query
+- gprof build: Average 465 ms/query
+
+[16] 51.2 0.00 0.21 3999987 handler::ha_index_next()
+[15] 51.2 0.01 0.20 3999993 maria_rnext [15]
+[22] 19.5 0.08 0.00 9658527 _ma_get_pack_key [22]
+
+This means that for 3999987 read next calls, the time of _ma_get_pack_key
+to retrieve the returned key is:
+0.08 * (3999987/9658527)
+
+The relation of KEY_COPY_COST to KEY_NEXT_FIND_COST is thus for Aria:
+
+0.08 * (3999987/9658527)/0.21 = 0.15777 parts of KEY_NEXT_FIND_COST
+
+------
+
+Query 2 "index_scan_4_parts" (4 integer key parts, 2 parts may have NULL's):
+select count(*) from $table force index (long_suppkey) where l_linenumber >= 0 and l_extra >0");
+
+- Optimized build: 218 ms
+- gprof build: Average 497 ms/query
+
+Most costly functions
+ % cumulative self self total
+ time seconds seconds calls ms/call ms/call name
+ 13.44 0.61 0.61 48292742 0.00 0.00 _ma_get_pack_key
+ 8.59 1.00 0.39 28298101 0.00 0.00 ha_key_cmp
+ 7.27 1.33 0.33 19999951 0.00 0.00 _ma_put_key_in_record
+ 4.41 1.96 0.20 19999952 0.00 0.00 handler::ha_index_next(unsigned char*)
+
+Call graph
+[13] 9.0 0.20 0.21 19999952 handler::ha_index_next(unsigned char*) [13]
+
+[3] 21.6 0.16 0.82 19999960 _ma_search_next [3]
+[18] 7.7 0.02 0.33 19999951 _ma_read_key_record [18]
+ 0.00 0.00 19887291/19999952 _ma_get_static_key [6565][19]
+ 18.4 0.10 0.64 19999936 Item_cond_and::val_int() [19]
+
+-> KEY_COPY_COST = 1.33/1.96 = 0.6785 parts of the index_read_next
+
+Total cost increase from 2 -> 4 key parts = 1.96 / 1.40 = 40%
+This includes the additional work in having more key pages, more work in
+finding next key (if key parts are packed or possible null) ,and copying
+the key parts to the record
+
+I also did a quick analyze between using NOT NULL keys, in which case
+Aria can use fixed key lengths. This gives a 39.4% speed up on index
+scan, a small speedup to table scan (as 2 fields are cannot have null)
+but not a notable speed up for anything else.
+
+
+Trying to understand ROW_COPY_COST
+==================================
+
+An simple table scan on an Aria table
+
+query: select sum(l_quantity) from check_costs_aria
+
+From gprof running the above query 10 times with 1M rows in the table:
+
+[14] 83.7 0.03 0.76 9999989 handler::ha_rnd_next()
+[17] 51.6 0.49 0.00 10000010 _ma_read_block_record2 [17]
+[18] 21.1 0.01 0.19 156359 pagecache_read [18]
+
+The function that unpacks the row is _ma_read_block_record2()
+
+Taking into account that all pages are cached:
+(Note that the main cost in pagecache_read in this test is calculating the page
+checksum)
+
+ROW_COPY_COST/ROW_NEXT_FIND_COST= 0.49/(0.76+0.3-0.20) = 0.56977 = 0.57
+
+
+Reason for SCAN_SETUP_COSTS
+===========================
+
+One problem with the new more exact cost model is that the optimizer
+starts to use table scans much more for small tables (which is correct when
+one looks at cost). However, small tables are usually cached fully so
+it is still better to use index scan in many cases.
+
+This problem is especially notable in mtr where most test cases uses
+tables with very few rows.
+
+TABLE_SCAN_SETUP_COST is used to add a constant startup cost for
+table and index scans. It is by default set to 10 usec, about 10 MyISAM
+row reads.
+
+The following cost calculation shows why this is needed:
+
+explain select count(*) from t1, t2 where t1.p = t2.i
++------+-------------+-------+-------+---------------+---------+---------+-----------+------+-------------+
+| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
++------+-------------+-------+-------+---------------+---------+---------+-----------+------+-------------+
+| 1 | SIMPLE | t1 | index | PRIMARY | PRIMARY | 4 | NULL | 2 | Using index |
+| 1 | SIMPLE | t2 | ref | k1 | k1 | 5 | test.t1.p | 2 | Using index |
++------+-------------+-------+-------+---------------+---------+---------+-----------+------+-------------+
+
+t1 has 2 rows
+t2 has 4 rows
+
+Optimizer trace shows when using TABLE_SCAN_SETUP_COST=0:
+
+index scan costs
+"read_cost": 0.00308962,
+read_and_compare_cost": 0.00321762
+
+key read costs:
+"rows": 2,
+"cost": 0.00567934
+
+CHOSEN:
+Scan with join cache: cost": 0.0038774
+rows_after_scan": 2
+
+Note that in the following, we are using cost in microseconds while
+the above costs are in milliseconds.
+
+select * from information_schema.optimizer_costs where engine="myisam"\G
+ ENGINE: MyISAM
+ OPTIMIZER_DISK_READ_COST: 10.240000
+ OPTIMIZER_INDEX_BLOCK_COPY_COST: 0.035600
+ OPTIMIZER_KEY_COMPARE_COST: 0.008000
+ OPTIMIZER_KEY_COPY_COST: 0.066660
+ OPTIMIZER_KEY_LOOKUP_COST: 0.498540
+ OPTIMIZER_KEY_NEXT_FIND_COST: 0.060210
+ OPTIMIZER_DISK_READ_RATIO: 0.200000
+OPTIMIZER_RND_POS_INTERFACE_COST: 0.000000
+ OPTIMIZER_ROW_COPY_COST: 0.088630
+ OPTIMIZER_ROW_LOOKUP_COST: 0.641150
+ OPTIMIZER_ROW_NEXT_FIND_COST: 0.049510
+ OPTIMIZER_ROWID_COMPARE_COST: 0.004000
+@@OPTIMIZER_SCAN_SETUP_COST 10.000000
+@@OPTIMIZER_WHERE_COST 0.032000
+
+Checking the calculated costs:
+
+index_scan_cost= 10.240000 * 0.2 + 0.035600 + 0.498540 + 4 * (0.060210+0.066660) = 3.08962
+where_cost 0.032000*4= 0.128000
+total: 3.21762
+
+key_read_cost= 10.240000 * 0.2 + 0.035600 + 0.498540 + 0.060210 = 2.64235
+key_copy_cost= 0.066660 * 2 = 0.13332
+where_cost 0.032000*2= 0.06400
+total: 2.64235 + 0.13332 + 0.06400 = 2.8396699999999999
+Needs to be done 2 times (2 rows in t1): 5.67934
+
+Join cache only needs 1 refill. The calculation is done in
+sql_select.cc:best_access_path()
+
+scan_with_join_cache=
+scan_time + cached_combinations * ROW_COPY_COST * JOIN_CACHE_COST +
+row_combinations * (ROW_COPY_COST * JOIN_CACHE_COST + WHERE_COST) =
+3.2176 + 2 * 0.088630 + 2*2 * (0.088630 * 1 + 0.032000) =
+3.87738
+
+Other observations:
+OPTIMIZER_KEY_NEXT_FIND_COST + OPTIMIZER_KEY_COPY_COST + OPTIMIZER_WHERE_COST=
+0.060210 + 0.066660 + 0.032000 = 0.158870
+OPTIMIZER_KEY_LOOKUP_COST / 0.158870 = 3.138
+
+This means that when using index only reads (and DISK_READ_RATIO=0)
+the optimizer will prefer to use 3 times more keys in range or ref
+than doing a key lookups!
+If DISK_READ_RATIO is higher, the above ratio increases. This is one of
+the reasons why we set the default value for DISK_READ_RATIO quite low
+(0.02 now)
+
+(OPTIMIZER_ROW_COPY_COST + OPTIMIZER_ROW_NEXT_FIND_COST) /
+(OPTIMIZER_KEY_COPY_COST + OPTIMIZER_KEY_NEXT_FIND_COST) =
+(0.088630 + 0.049510) / (0.066660 + 0.060210) = 1.08831
+Which means that table scans and index scans have almost the same cost.
+select 0.066660
+
+
+HEAP_TEMPTABLE_CREATE_COST
+==========================
+
+I added trackers in create_tmp_table() and open_tmp_table() and run a
+simple query that create two materialized temporary table with an unique
+index 31 times. I got the following tracking information:
+
+(gdb) p open_tracker
+$1 = {counter = 31, cycles = 302422}
+(gdb) p create_tracker
+$2 = {counter = 31, cycles = 1479836}
+
+Cycles per create = (302422 + 1479836)/31= 57492
+
+1000.0*57492/sys_timer_info.cycles.frequency = 0.0249 ms
+HEAP_TMPTABLE_CREATE_COST= 0.025 ms
+
+
+MySQL cost structures
+=====================
+
+MySQL 8.0 server cost are stored in the class Server_cost_constants defined
+int opt_costconstants.h
+
+It containts the following slots and has the following default values:
+
+m_row_evaluate_cost 0.1 Cost for evaluating the query condition on
+ a row
+m_key_compare_cost 0.05 Cost for comparing two keys
+m_memory_temptable_create_cost 1.0 Cost for creating an internal temporary
+ table in memory
+m_memory_temptable_row_cost 0.1 Cost for retrieving or storing a row in an
+ internal temporary table stored in memory.
+m_disk_temptable_create_cost 20.0 Cost for creating an internal temporary
+ table in a disk resident storage engine.
+m_disk_temptable_row_cost 0.5 Cost for retrieving or storing a row in an
+ internal disk resident temporary table.
+
+Engine cost variables:
+m_memory_block_read_cost 0.25 The cost of reading a block from a main
+ memory buffer pool
+m_io_block_read_cost 1.0 The cost of reading a block from an
+ IO device (disk)
+
+-------
+
+Some cost functions:
+
+scan_time() = data_file_length / IO_SIZE + 2;
+read_time(index, ranges, rows)= rows2double(ranges + rows);
+index_only_read_time()= records / keys_per_block
+
+table_scan_cost()= scan_time() * page_read_cost(1.0);
+
+index_scan_cost()= index_only_read_time(index, rows) *
+ page_read_cost_index(index, 1.0);
+read_cost()= read_time() * page_read_cost(1.0);
+
+
+page_read_cost()= buffer_block_read_cost(pages_in_mem) +
+ io_block_read_cost(pages_on_disk);
+
+io_block_read_cost()= blocks * m_io_block_read_cost
+buffer_block_read_cost()= blocks * m_memory_block_read_cost;
+
+
+There are also:
+table_in_memory_estimate()
+index_in_memory_estimate()
+
+If the storage engine is not providing estimates for the above, then
+the estimates are done based on table size (not depending on how many
+rows are going to be accessed in the table).
diff --git a/client/mysql.cc b/client/mysql.cc
index 0e704f70ecb..015b977e1c3 100644
--- a/client/mysql.cc
+++ b/client/mysql.cc
@@ -46,7 +46,7 @@
#include <locale.h>
#endif
-const char *VER= "15.1";
+const char *VER= "15.2";
/* Don't try to make a nice table if the data is too big */
#define MAX_COLUMN_LENGTH 1024
@@ -246,7 +246,7 @@ static my_bool ignore_errors=0,wait_flag=0,quick=0,
tty_password= 0, opt_nobeep=0, opt_reconnect=1,
opt_secure_auth= 0,
default_pager_set= 0, opt_sigint_ignore= 0,
- auto_vertical_output= 0,
+ auto_vertical_output= 0, show_query_cost= 0,
show_warnings= 0, executing_query= 0,
ignore_spaces= 0, opt_binhex= 0, opt_progress_reports;
static my_bool debug_info_flag, debug_check_flag, batch_abort_on_error;
@@ -324,6 +324,7 @@ static int com_quit(String *str,char*),
com_notee(String *str, char*), com_charset(String *str,char*),
com_prompt(String *str, char*), com_delimiter(String *str, char*),
com_warnings(String *str, char*), com_nowarnings(String *str, char*);
+static int com_query_cost(String *str, char*);
#ifdef USE_POPEN
static int com_nopager(String *str, char*), com_pager(String *str, char*),
@@ -395,6 +396,8 @@ static COMMANDS commands[] = {
{ "print", 'p', com_print, 0, "Print current command." },
{ "prompt", 'R', com_prompt, 1, "Change your mysql prompt."},
{ "quit", 'q', com_quit, 0, "Quit mysql." },
+ { "costs", 'Q', com_query_cost, 0,
+ "Toggle showing query costs after each query" },
{ "rehash", '#', com_rehash, 0, "Rebuild completion hash." },
{ "source", '.', com_source, 1,
"Execute an SQL script file. Takes a file name as an argument."},
@@ -1156,6 +1159,7 @@ static void print_table_data_xml(MYSQL_RES *result);
static void print_tab_data(MYSQL_RES *result);
static void print_table_data_vertically(MYSQL_RES *result);
static void print_warnings(void);
+static void print_last_query_cost(void);
static void end_timer(ulonglong start_time, char *buff);
static void nice_time(double sec,char *buff,bool part_second);
extern "C" sig_handler mysql_end(int sig) __attribute__ ((noreturn));
@@ -1816,6 +1820,10 @@ static struct my_option my_long_options[] =
{"show-warnings", OPT_SHOW_WARNINGS, "Show warnings after every statement.",
&show_warnings, &show_warnings, 0, GET_BOOL, NO_ARG,
0, 0, 0, 0, 0, 0},
+ {"show-query-costs", OPT_SHOW_WARNINGS,
+ "Show query cost after every statement.",
+ &show_query_cost, &show_query_cost, 0, GET_BOOL, NO_ARG,
+ 0, 0, 0, 0, 0, 0},
{"plugin_dir", OPT_PLUGIN_DIR, "Directory for client-side plugins.",
&opt_plugin_dir, &opt_plugin_dir, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
@@ -3574,6 +3582,8 @@ end:
/* Show warnings if any or error occurred */
if (show_warnings == 1 && (warnings >= 1 || error))
print_warnings();
+ if (show_query_cost)
+ print_last_query_cost();
if (!error && !status.batch &&
(mysql.server_status & SERVER_STATUS_DB_DROPPED))
@@ -4178,6 +4188,30 @@ end:
}
+/* print_last_query_cost */
+
+static void print_last_query_cost()
+{
+ const char *query;
+ char *end;
+ MYSQL_RES *result;
+ MYSQL_ROW cur;
+
+ query= "show status like 'last_query_cost'";
+ mysql_real_query_for_lazy(query, strlen(query));
+ mysql_store_result_for_lazy(&result);
+ if (!result)
+ goto end;
+
+ cur= mysql_fetch_row(result);
+ if (strtod(cur[1], &end) != 0.0)
+ tee_fprintf(PAGER, "%s: %s\n\n", cur[0], cur[1]);
+
+end:
+ mysql_free_result(result);
+}
+
+
static const char *array_value(const char **array, char key)
{
for (; *array; array+= 2)
@@ -4753,6 +4787,18 @@ com_nowarnings(String *buffer __attribute__((unused)),
return 0;
}
+static int
+com_query_cost(String *buffer __attribute__((unused)),
+ char *line __attribute__((unused)))
+{
+ show_query_cost= 1 - show_query_cost;
+ if (show_query_cost)
+ put_info("Last_query_cost enabled.",INFO_INFO);
+ else
+ put_info("Last_query_cost disabled.",INFO_INFO);
+ return 0;
+}
+
/*
Gets argument from a command on the command line. If mode is not GET_NEXT,
skips the command and returns the first argument. The line is modified by
@@ -5008,6 +5054,10 @@ com_status(String *buffer __attribute__((unused)),
ulonglong id;
MYSQL_RES *UNINIT_VAR(result);
+ /*
+ Don't remove "limit 1",
+ it is protection against SQL_SELECT_LIMIT=0
+ */
if (mysql_real_query_for_lazy(
C_STRING_WITH_LEN("select DATABASE(), USER() limit 1")))
return 0;
@@ -5015,10 +5065,6 @@ com_status(String *buffer __attribute__((unused)),
tee_puts("--------------", stdout);
usage(1); /* Print version */
tee_fprintf(stdout, "\nConnection id:\t\t%lu\n",mysql_thread_id(&mysql));
- /*
- Don't remove "limit 1",
- it is protection against SQL_SELECT_LIMIT=0
- */
if (!mysql_store_result_for_lazy(&result))
{
MYSQL_ROW cur=mysql_fetch_row(result);
diff --git a/include/my_getopt.h b/include/my_getopt.h
index ffff706e015..b57ac19f294 100644
--- a/include/my_getopt.h
+++ b/include/my_getopt.h
@@ -40,6 +40,7 @@ C_MODE_START
#define GET_FLAGSET 15
#define GET_BIT 16
+#define GET_ADJUST_VALUE 256
#define GET_ASK_ADDR 128
#define GET_AUTO 64
#define GET_TYPE_MASK 63
@@ -100,6 +101,7 @@ typedef my_bool (*my_get_one_option)(const struct my_option *, const char *, con
typedef void *(*my_getopt_value)(const char *, uint, const struct my_option *,
int *);
+typedef void (*my_getopt_adjust)(const struct my_option *, void *);
extern char *disabled_my_option;
extern char *autoset_my_option;
@@ -109,6 +111,7 @@ extern my_bool my_getopt_prefix_matching;
extern my_bool my_handle_options_init_variables;
extern my_error_reporter my_getopt_error_reporter;
extern my_getopt_value my_getopt_get_addr;
+extern my_getopt_adjust my_getopt_adjust_value;
extern int handle_options (int *argc, char ***argv,
const struct my_option *longopts, my_get_one_option)
diff --git a/include/my_global.h b/include/my_global.h
index a849597f468..9b74824255f 100644
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -679,6 +679,7 @@ typedef SOCKET_SIZE_TYPE size_socket;
Io buffer size; Must be a power of 2 and a multiple of 512. May be
smaller what the disk page size. This influences the speed of the
isam btree library. eg to big to slow.
+ 4096 is a common block size on SSDs.
*/
#define IO_SIZE 4096U
/*
diff --git a/include/my_tracker.h b/include/my_tracker.h
new file mode 100644
index 00000000000..88cefe5ef5d
--- /dev/null
+++ b/include/my_tracker.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2022, MariaDB Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
+
+/*
+ Trivial framework to add a tracker to a C function
+*/
+
+#include "my_rdtsc.h"
+
+struct my_time_tracker
+{
+ ulonglong counter;
+ ulonglong cycles;
+};
+
+#ifdef HAVE_TIME_TRACKING
+#define START_TRACKING ulonglong my_start_time= my_timer_cycles()
+#define END_TRACKING(var) \
+ { \
+ ulonglong my_end_time= my_timer_cycles(); \
+ (var)->counter++; \
+ (var)->cycles+= (unlikely(my_end_time < my_start_time) ? \
+ my_end_time - my_start_time + ULONGLONG_MAX : \
+ my_end_time - my_start_time); \
+ }
+#else
+#define START_TRACKING
+#define END_TRACKING(var) do { } while(0)
+#endif
diff --git a/include/myisam.h b/include/myisam.h
index 0942584e874..ad86903bc07 100644
--- a/include/myisam.h
+++ b/include/myisam.h
@@ -435,6 +435,8 @@ int thr_write_keys(MI_SORT_PARAM *sort_param);
int sort_write_record(MI_SORT_PARAM *sort_param);
int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulonglong);
my_bool mi_too_big_key_for_sort(MI_KEYDEF *key, ha_rows rows);
+struct OPTIMIZER_COSTS;
+void myisam_update_optimizer_costs(struct OPTIMIZER_COSTS *costs);
#ifdef __cplusplus
}
diff --git a/mysql-test/include/analyze-format.inc b/mysql-test/include/analyze-format.inc
index 7d1c48f3e6f..f9579555d7b 100644
--- a/mysql-test/include/analyze-format.inc
+++ b/mysql-test/include/analyze-format.inc
@@ -1,3 +1,3 @@
# The time on ANALYSE FORMAT=JSON is rather variable
---replace_regex /("(r_total_time_ms|r_table_time_ms|r_other_time_ms|r_buffer_size|r_filling_time_ms|r_query_time_in_progress_ms)": )[^, \n]*/\1"REPLACED"/
+--replace_regex /("(r_total_time_ms|r_table_time_ms|r_other_time_ms|r_buffer_size|r_filling_time_ms|r_query_time_in_progress_ms|r_unpack_time_ms)": )[^, \n]*/\1"REPLACED"/
diff --git a/mysql-test/main/analyze_format_json.result b/mysql-test/main/analyze_format_json.result
index 9a756782f96..7fd783e4355 100644
--- a/mysql-test/main/analyze_format_json.result
+++ b/mysql-test/main/analyze_format_json.result
@@ -183,7 +183,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "1Kb",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -233,7 +234,8 @@ ANALYZE
"buffer_size": "1Kb",
"join_type": "BNL",
"attached_condition": "tbl1.c > tbl2.c",
- "r_filtered": 15.83333333
+ "r_filtered": 15.83333333,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -769,13 +771,14 @@ ANALYZE
"r_other_time_ms": "REPLACED",
"filtered": 100,
"r_filtered": 0,
- "attached_condition": "<in_optimizer>(t2.b,t2.b in (subquery#2))"
+ "attached_condition": "<in_optimizer>(t2.b,<exists>(subquery#2))"
},
"buffer_type": "flat",
"buffer_size": "65",
"join_type": "BNL",
- "attached_condition": "<in_optimizer>(t2.b,t2.b in (subquery#2))",
- "r_filtered": null
+ "attached_condition": "<in_optimizer>(t2.b,<exists>(subquery#2))",
+ "r_filtered": null,
+ "r_unpack_time_ms": "REPLACED"
}
}
],
@@ -783,20 +786,21 @@ ANALYZE
{
"query_block": {
"select_id": 2,
- "r_loops": 1,
+ "r_loops": 2,
"r_total_time_ms": "REPLACED",
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "ALL",
- "r_loops": 1,
+ "r_loops": 2,
"rows": 2,
"r_rows": 2,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
"filtered": 100,
- "r_filtered": 100
+ "r_filtered": 0,
+ "attached_condition": "4 = t1.a"
}
}
]
@@ -878,7 +882,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "1",
"join_type": "BNL",
- "r_filtered": null
+ "r_filtered": null,
+ "r_unpack_time_ms": "REPLACED"
}
}
],
@@ -916,7 +921,8 @@ ANALYZE
"buffer_size": "65",
"join_type": "BNL",
"attached_condition": "t2.f2 = t3.f3",
- "r_filtered": null
+ "r_filtered": null,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
diff --git a/mysql-test/main/analyze_stmt_orderby.result b/mysql-test/main/analyze_stmt_orderby.result
index 76bc4d964b8..e188f93c160 100644
--- a/mysql-test/main/analyze_stmt_orderby.result
+++ b/mysql-test/main/analyze_stmt_orderby.result
@@ -494,7 +494,8 @@ ANALYZE
"buffer_size": "65",
"join_type": "BNL",
"attached_condition": "t3.a = t0.a",
- "r_filtered": 10
+ "r_filtered": 10,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -578,7 +579,8 @@ ANALYZE
"buffer_size": "119",
"join_type": "BNL",
"attached_condition": "t5.a = t6.a",
- "r_filtered": 21.42857143
+ "r_filtered": 21.42857143,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
diff --git a/mysql-test/main/ctype_collate.result b/mysql-test/main/ctype_collate.result
index 1ae9f295042..29d27fd608b 100644
--- a/mysql-test/main/ctype_collate.result
+++ b/mysql-test/main/ctype_collate.result
@@ -748,7 +748,7 @@ hex(b)
explain
select hex(b) from t1 where b<'zzz' order by b;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 ALL PRIMARY NULL NULL NULL 4 Using where; Using filesort
+1 SIMPLE t1 range PRIMARY PRIMARY 34 NULL 4 Using where; Using filesort
select hex(b) from t1 where b<'zzz' order by b;
hex(b)
00
diff --git a/mysql-test/main/delete.result b/mysql-test/main/delete.result
index ed3683d52f9..77a1f8c9813 100644
--- a/mysql-test/main/delete.result
+++ b/mysql-test/main/delete.result
@@ -128,7 +128,6 @@ a b
delete ignore t11.*, t12.* from t11,t12 where t11.a = t12.a and t11.b <> (select b from t2 where t11.a < t2.a);
Warnings:
Warning 1242 Subquery returns more than 1 row
-Warning 1242 Subquery returns more than 1 row
select * from t11;
a b
0 10
diff --git a/mysql-test/main/except.result b/mysql-test/main/except.result
index d83623370d5..4d23ca772b3 100644
--- a/mysql-test/main/except.result
+++ b/mysql-test/main/except.result
@@ -387,7 +387,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "119",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -429,7 +430,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "119",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -505,7 +507,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "119",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -547,7 +550,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "119",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
diff --git a/mysql-test/main/except_all.result b/mysql-test/main/except_all.result
index df19abda077..e62062d0427 100644
--- a/mysql-test/main/except_all.result
+++ b/mysql-test/main/except_all.result
@@ -514,7 +514,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "119",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -556,7 +557,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "119",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -631,7 +633,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "119",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -673,7 +676,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "119",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
diff --git a/mysql-test/main/fetch_first.result b/mysql-test/main/fetch_first.result
index e36f2db4100..c277362739a 100644
--- a/mysql-test/main/fetch_first.result
+++ b/mysql-test/main/fetch_first.result
@@ -843,6 +843,7 @@ fetch first 2 rows with ties;
first_name last_name
Alice Fowler
Bob Trasc
+Silvia Ganush
#
# Test CTE support.
#
@@ -858,7 +859,7 @@ select * from temp_table
order by first_name, last_name;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 Using filesort
-2 DERIVED t1 range t1_name t1_name 103 NULL 3 Using where; Using index
+2 DERIVED t1 range t1_name t1_name 206 NULL 3 Using where; Using index for group-by
with temp_table as (
select first_name, last_name
from t1
diff --git a/mysql-test/main/fulltext_order_by.test b/mysql-test/main/fulltext_order_by.test
index 9fddf3b2fec..4c0333d0afb 100644
--- a/mysql-test/main/fulltext_order_by.test
+++ b/mysql-test/main/fulltext_order_by.test
@@ -36,6 +36,7 @@ SELECT IF(a=7,'match',IF(a=4,'match', 'no-match')), MATCH (message) AGAINST ('st
# for fulltext searches too
#
alter table t1 add key m (message);
+show create table t1;
explain SELECT message FROM t1 WHERE MATCH (message) AGAINST ('steve') ORDER BY message;
SELECT message FROM t1 WHERE MATCH (message) AGAINST ('steve') ORDER BY message desc;
diff --git a/mysql-test/main/information_schema_all_engines.result b/mysql-test/main/information_schema_all_engines.result
index 23a853e363c..db9bf156b8e 100644
--- a/mysql-test/main/information_schema_all_engines.result
+++ b/mysql-test/main/information_schema_all_engines.result
@@ -42,6 +42,7 @@ INNODB_TRX
KEYWORDS
KEY_CACHES
KEY_COLUMN_USAGE
+OPTIMIZER_COSTS
OPTIMIZER_TRACE
PARAMETERS
PARTITIONS
@@ -123,6 +124,7 @@ INNODB_TRX trx_id
KEYWORDS WORD
KEY_CACHES KEY_CACHE_NAME
KEY_COLUMN_USAGE CONSTRAINT_SCHEMA
+OPTIMIZER_COSTS ENGINE
OPTIMIZER_TRACE QUERY
PARAMETERS SPECIFIC_SCHEMA
PARTITIONS TABLE_SCHEMA
@@ -204,6 +206,7 @@ INNODB_TRX trx_id
KEYWORDS WORD
KEY_CACHES KEY_CACHE_NAME
KEY_COLUMN_USAGE CONSTRAINT_SCHEMA
+OPTIMIZER_COSTS ENGINE
OPTIMIZER_TRACE QUERY
PARAMETERS SPECIFIC_SCHEMA
PARTITIONS TABLE_SCHEMA
@@ -289,6 +292,7 @@ INNODB_TABLESPACES_ENCRYPTION information_schema.INNODB_TABLESPACES_ENCRYPTION 1
INNODB_TRX information_schema.INNODB_TRX 1
KEY_CACHES information_schema.KEY_CACHES 1
KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1
+OPTIMIZER_COSTS information_schema.OPTIMIZER_COSTS 1
OPTIMIZER_TRACE information_schema.OPTIMIZER_TRACE 1
PARAMETERS information_schema.PARAMETERS 1
PARTITIONS information_schema.PARTITIONS 1
@@ -359,6 +363,7 @@ Database: information_schema
| KEYWORDS |
| KEY_CACHES |
| KEY_COLUMN_USAGE |
+| OPTIMIZER_COSTS |
| OPTIMIZER_TRACE |
| PARAMETERS |
| PARTITIONS |
@@ -430,6 +435,7 @@ Database: INFORMATION_SCHEMA
| KEYWORDS |
| KEY_CACHES |
| KEY_COLUMN_USAGE |
+| OPTIMIZER_COSTS |
| OPTIMIZER_TRACE |
| PARAMETERS |
| PARTITIONS |
@@ -463,5 +469,5 @@ Wildcard: inf_rmation_schema
| information_schema |
SELECT table_schema, count(*) FROM information_schema.TABLES WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test', 'mysqltest') GROUP BY TABLE_SCHEMA;
table_schema count(*)
-information_schema 66
+information_schema 67
mysql 31
diff --git a/mysql-test/main/intersect.result b/mysql-test/main/intersect.result
index 425f6940a35..299737e794b 100644
--- a/mysql-test/main/intersect.result
+++ b/mysql-test/main/intersect.result
@@ -462,7 +462,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "256Kb",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -560,7 +561,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "256Kb",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
diff --git a/mysql-test/main/intersect_all.result b/mysql-test/main/intersect_all.result
index aecd5b1ac18..e47a8872211 100644
--- a/mysql-test/main/intersect_all.result
+++ b/mysql-test/main/intersect_all.result
@@ -493,7 +493,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "65",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -591,7 +592,8 @@ ANALYZE
"buffer_type": "flat",
"buffer_size": "65",
"join_type": "BNL",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
diff --git a/mysql-test/main/key.result b/mysql-test/main/key.result
index 762218f8580..42d57a35531 100644
--- a/mysql-test/main/key.result
+++ b/mysql-test/main/key.result
@@ -631,19 +631,19 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using temporary; Using filesort
SHOW STATUS LIKE 'Last_query_cost';
Variable_name Value
-Last_query_cost 8.506592
+Last_query_cost 0.014749
EXPLAIN SELECT a, SUM( b ) FROM t1 USE INDEX( a ) GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using temporary; Using filesort
SHOW STATUS LIKE 'Last_query_cost';
Variable_name Value
-Last_query_cost 8.506592
+Last_query_cost 0.014749
EXPLAIN SELECT a, SUM( b ) FROM t1 FORCE INDEX( a ) GROUP BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL a 5 NULL 6
SHOW STATUS LIKE 'Last_query_cost';
Variable_name Value
-Last_query_cost 8.506592
+Last_query_cost 0.014749
DROP TABLE t1;
#
# MDEV-21480: Unique key using ref access though eq_ref access can be used
diff --git a/mysql-test/main/myisam.result b/mysql-test/main/myisam.result
index cb163bb29ff..aae3ea173b7 100644
--- a/mysql-test/main/myisam.result
+++ b/mysql-test/main/myisam.result
@@ -348,11 +348,11 @@ t1 1 c_2 2 a A 5 NULL NULL BTREE NO
explain select * from t1,t2 where t1.a=t2.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL a NULL NULL NULL 2
-1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join)
+1 SIMPLE t1 ref a a 4 test.t2.a 3
explain select * from t1,t2 force index(a) where t1.a=t2.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL a NULL NULL NULL 2
-1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join)
+1 SIMPLE t1 ref a a 4 test.t2.a 3
explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL a NULL NULL NULL 2
@@ -388,10 +388,10 @@ t1 1 c_2 2 a A 5 NULL NULL BTREE NO
explain select * from t1,t2 force index(c) where t1.a=t2.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 2
-1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join)
+1 SIMPLE t1 ref a a 4 test.t2.a 3
explain select * from t1 where a=0 or a=2;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where
+1 SIMPLE t1 range a a 4 NULL 5 Using index condition
explain select * from t1 force index (a) where a=0 or a=2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 4 NULL 5 Using index condition
@@ -640,7 +640,7 @@ create table t1 ( a tinytext, b char(1), index idx (a(1),b) );
insert into t1 values (null,''), (null,'');
explain select count(*) from t1 where a is null;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 ALL idx NULL NULL NULL 2 Using where
+1 SIMPLE t1 ref idx idx 4 const 2 Using where
select count(*) from t1 where a is null;
count(*)
2
diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
index 55a316aca55..cc62e406e26 100644
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -720,11 +720,15 @@ The following specify which files/extra groups are read (specified before remain
max_connections*5 or max_connections + table_cache*2
(whichever is larger) number of file descriptors
(Automatically configured unless set explicitly)
- --optimizer-cache-hit-ratio=#
- Expected hit rate of the row and index cache in storage
- engines. The value should be an integer between 0 and 99,
- where 0 means cache is empty and 99 means that value is
- almost always in the cache.
+ --optimizer-disk-read-cost=#
+ Cost of reading a block of IO_SIZE (4096) from a disk (in
+ usec).
+ --optimizer-disk-read-ratio=#
+ Chance that we have to do a disk read to find a row or
+ index entry from the engine cache
+ (cache_misses/total_cache_requests). 0.0 means that
+ everything is cached and 1.0 means that nothing is
+ expected to be in the engine cache.
--optimizer-extra-pruning-depth=#
If the optimizer needs to enumerate join prefix of this
size or larger, then it will try agressively prune away
@@ -737,6 +741,8 @@ The following specify which files/extra groups are read (specified before remain
--optimizer-key-copy-cost=#
Cost of finding the next key in the engine and copying it
to the SQL layer.
+ --optimizer-key-lookup-cost=#
+ Cost for finding a key based on a key value
--optimizer-key-next-find-cost=#
Cost of finding the next key and rowid when using
filters.
@@ -753,6 +759,14 @@ The following specify which files/extra groups are read (specified before remain
--optimizer-row-copy-cost=#
Cost of copying a row from the engine or the join cache
to the SQL layer.
+ --optimizer-row-lookup-cost=#
+ Cost of finding a row based on a rowid or a clustered
+ key.
+ --optimizer-row-next-find-cost=#
+ Cost of finding the next row when scanning the table.
+ --optimizer-scan-setup-cost=#
+ Extra cost added to TABLE and INDEX scans to get
+ optimizer to prefer index lookups.
--optimizer-search-depth=#
Maximum depth of search performed by the query optimizer.
Values larger than the number of relations in a query
@@ -807,6 +821,8 @@ The following specify which files/extra groups are read (specified before remain
record samples
--optimizer-where-cost=#
Cost of checking the row against the WHERE clause.
+ Increasing this will have the optimizer to prefer plans
+ with less row combinations.
--performance-schema
Enable the performance schema.
--performance-schema-accounts-size=#
@@ -1719,22 +1735,27 @@ old-alter-table DEFAULT
old-mode UTF8_IS_UTF8MB3
old-passwords FALSE
old-style-user-limits FALSE
-optimizer-cache-hit-ratio 50
+optimizer-disk-read-cost 0.01024
+optimizer-disk-read-ratio 0.02
optimizer-extra-pruning-depth 8
-optimizer-index-block-copy-cost 0.2
-optimizer-key-compare-cost 0.05
-optimizer-key-copy-cost 0.025
-optimizer-key-next-find-cost 0.0125
+optimizer-index-block-copy-cost 3.56e-05
+optimizer-key-compare-cost 1.1361e-05
+optimizer-key-copy-cost 1.5685e-05
+optimizer-key-lookup-cost 0.000435777
+optimizer-key-next-find-cost 8.2347e-05
optimizer-max-sel-arg-weight 32000
optimizer-prune-level 2
-optimizer-row-copy-cost 0.05
+optimizer-row-copy-cost 6.0866e-05
+optimizer-row-lookup-cost 0.000130839
+optimizer-row-next-find-cost 4.5916e-05
+optimizer-scan-setup-cost 0.01
optimizer-search-depth 62
optimizer-selectivity-sampling-limit 100
optimizer-switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on
optimizer-trace
optimizer-trace-max-mem-size 1048576
optimizer-use-condition-selectivity 4
-optimizer-where-cost 0.2
+optimizer-where-cost 3.2e-05
performance-schema FALSE
performance-schema-accounts-size -1
performance-schema-consumer-events-stages-current FALSE
diff --git a/mysql-test/main/opt_trace_security.result b/mysql-test/main/opt_trace_security.result
index 5334ed8b2c4..f440aa381c3 100644
--- a/mysql-test/main/opt_trace_security.result
+++ b/mysql-test/main/opt_trace_security.result
@@ -80,8 +80,8 @@ select * from db1.t1 {
"table": "t1",
"table_scan": {
"rows": 3,
- "read_cost": 1.002563477,
- "read_and_compare_cost": 1.752563477
+ "read_cost": 0.010373215,
+ "read_and_compare_cost": 0.010469215
}
}
]
@@ -101,18 +101,18 @@ select * from db1.t1 {
{
"access_type": "scan",
"rows": 3,
- "rows_after_scan": 3,
"rows_after_filter": 3,
- "cost": 1.752563477,
+ "rows_out": 3,
+ "cost": 0.010469215,
"index_only": false,
"chosen": true
}
],
"chosen_access_method": {
"type": "scan",
- "records_read": 3,
- "records_out": 3,
- "cost": 1.752563477,
+ "rows_read": 3,
+ "rows_out": 3,
+ "cost": 0.010469215,
"uses_join_buffering": false
}
}
@@ -123,14 +123,14 @@ select * from db1.t1 {
"plan_prefix": [],
"table": "t1",
"rows_for_plan": 3,
- "cost_for_plan": 1.752563477
+ "cost_for_plan": 0.010469215
}
]
},
{
"best_join_order": ["t1"],
"rows": 3,
- "cost": 1.752563477
+ "cost": 0.010469215
},
{
"attaching_conditions_to_tables": {
@@ -219,8 +219,8 @@ select * from db1.v1 {
"table": "t1",
"table_scan": {
"rows": 3,
- "read_cost": 1.002563477,
- "read_and_compare_cost": 1.752563477
+ "read_cost": 0.010373215,
+ "read_and_compare_cost": 0.010469215
}
}
]
@@ -240,18 +240,18 @@ select * from db1.v1 {
{
"access_type": "scan",
"rows": 3,
- "rows_after_scan": 3,
"rows_after_filter": 3,
- "cost": 1.752563477,
+ "rows_out": 3,
+ "cost": 0.010469215,
"index_only": false,
"chosen": true
}
],
"chosen_access_method": {
"type": "scan",
- "records_read": 3,
- "records_out": 3,
- "cost": 1.752563477,
+ "rows_read": 3,
+ "rows_out": 3,
+ "cost": 0.010469215,
"uses_join_buffering": false
}
}
@@ -262,14 +262,14 @@ select * from db1.v1 {
"plan_prefix": [],
"table": "t1",
"rows_for_plan": 3,
- "cost_for_plan": 1.752563477
+ "cost_for_plan": 0.010469215
}
]
},
{
"best_join_order": ["t1"],
"rows": 3,
- "cost": 1.752563477
+ "cost": 0.010469215
},
{
"attaching_conditions_to_tables": {
diff --git a/mysql-test/main/opt_trace_ucs2.result b/mysql-test/main/opt_trace_ucs2.result
index 1ced3cd6dc2..5ae8a5fd6b8 100644
--- a/mysql-test/main/opt_trace_ucs2.result
+++ b/mysql-test/main/opt_trace_ucs2.result
@@ -42,7 +42,7 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.analyzing_range_alternatives'))
"using_mrr": false,
"index_only": false,
"rows": 2,
- "cost": 2.022733708,
+ "cost": 0.003717837,
"chosen": true
}
],
diff --git a/mysql-test/main/rowid_filter_innodb.result b/mysql-test/main/rowid_filter_innodb.result
index fb7f853dd64..c1f47d40ce1 100644
--- a/mysql-test/main/rowid_filter_innodb.result
+++ b/mysql-test/main/rowid_filter_innodb.result
@@ -244,7 +244,7 @@ EXPLAIN
"key_length": "4",
"used_key_parts": ["l_shipDATE"],
"rows": 510,
- "filtered": 100,
+ "filtered": 10.07493782,
"index_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'",
"attached_condition": "lineitem.l_quantity > 45"
}
@@ -256,7 +256,7 @@ set statement optimizer_switch='rowid_filter=off' for ANALYZE SELECT l_orderkey,
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
l_quantity > 45;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
-1 SIMPLE lineitem range i_l_shipdate,i_l_quantity i_l_shipdate 4 NULL 510 510.00 100.00 11.76 Using index condition; Using where
+1 SIMPLE lineitem range i_l_shipdate,i_l_quantity i_l_shipdate 4 NULL 510 510.00 10.07 11.76 Using index condition; Using where
set statement optimizer_switch='rowid_filter=off' for ANALYZE FORMAT=JSON SELECT l_orderkey, l_linenumber, l_shipdate, l_quantity FROM lineitem
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
l_quantity > 45;
@@ -283,7 +283,7 @@ ANALYZE
"r_rows": 510,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 100,
+ "filtered": 10.07493782,
"r_filtered": 11.76470588,
"index_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'",
"attached_condition": "lineitem.l_quantity > 45"
@@ -361,8 +361,8 @@ FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
o_totalprice between 200000 and 230000;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 Using where; Using index
-1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 Using where
+1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 Using where; Using index
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 Using where
set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
@@ -374,35 +374,35 @@ EXPLAIN
"nested_loop": [
{
"table": {
- "table_name": "lineitem",
+ "table_name": "orders",
"access_type": "range",
- "possible_keys": [
- "PRIMARY",
- "i_l_shipdate",
- "i_l_orderkey",
- "i_l_orderkey_quantity"
- ],
- "key": "i_l_shipdate",
- "key_length": "4",
- "used_key_parts": ["l_shipDATE"],
- "rows": 98,
+ "possible_keys": ["PRIMARY", "i_o_totalprice"],
+ "key": "i_o_totalprice",
+ "key_length": "9",
+ "used_key_parts": ["o_totalprice"],
+ "rows": 71,
"filtered": 100,
- "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'",
+ "attached_condition": "orders.o_totalprice between 200000 and 230000",
"using_index": true
}
},
{
"table": {
- "table_name": "orders",
- "access_type": "eq_ref",
- "possible_keys": ["PRIMARY", "i_o_totalprice"],
+ "table_name": "lineitem",
+ "access_type": "ref",
+ "possible_keys": [
+ "PRIMARY",
+ "i_l_shipdate",
+ "i_l_orderkey",
+ "i_l_orderkey_quantity"
+ ],
"key": "PRIMARY",
"key_length": "4",
- "used_key_parts": ["o_orderkey"],
- "ref": ["dbt3_s001.lineitem.l_orderkey"],
- "rows": 1,
- "filtered": 4.733333111,
- "attached_condition": "orders.o_totalprice between 200000 and 230000"
+ "used_key_parts": ["l_orderkey"],
+ "ref": ["dbt3_s001.orders.o_orderkey"],
+ "rows": 4,
+ "filtered": 1.633319736,
+ "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'"
}
}
]
@@ -413,8 +413,8 @@ FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
o_totalprice between 200000 and 230000;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
-1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 98.00 100.00 100.00 Using where; Using index
-1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 4.73 11.22 Using where
+1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 71.00 100.00 100.00 Using where; Using index
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 1.63 2.31 Using where
set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
@@ -431,45 +431,45 @@ ANALYZE
"nested_loop": [
{
"table": {
- "table_name": "lineitem",
+ "table_name": "orders",
"access_type": "range",
- "possible_keys": [
- "PRIMARY",
- "i_l_shipdate",
- "i_l_orderkey",
- "i_l_orderkey_quantity"
- ],
- "key": "i_l_shipdate",
- "key_length": "4",
- "used_key_parts": ["l_shipDATE"],
+ "possible_keys": ["PRIMARY", "i_o_totalprice"],
+ "key": "i_o_totalprice",
+ "key_length": "9",
+ "used_key_parts": ["o_totalprice"],
"r_loops": 1,
- "rows": 98,
- "r_rows": 98,
+ "rows": 71,
+ "r_rows": 71,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
"filtered": 100,
"r_filtered": 100,
- "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'",
+ "attached_condition": "orders.o_totalprice between 200000 and 230000",
"using_index": true
}
},
{
"table": {
- "table_name": "orders",
- "access_type": "eq_ref",
- "possible_keys": ["PRIMARY", "i_o_totalprice"],
+ "table_name": "lineitem",
+ "access_type": "ref",
+ "possible_keys": [
+ "PRIMARY",
+ "i_l_shipdate",
+ "i_l_orderkey",
+ "i_l_orderkey_quantity"
+ ],
"key": "PRIMARY",
"key_length": "4",
- "used_key_parts": ["o_orderkey"],
- "ref": ["dbt3_s001.lineitem.l_orderkey"],
- "r_loops": 98,
- "rows": 1,
- "r_rows": 1,
+ "used_key_parts": ["l_orderkey"],
+ "ref": ["dbt3_s001.orders.o_orderkey"],
+ "r_loops": 71,
+ "rows": 4,
+ "r_rows": 6.704225352,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 4.733333111,
- "r_filtered": 11.2244898,
- "attached_condition": "orders.o_totalprice between 200000 and 230000"
+ "filtered": 1.633319736,
+ "r_filtered": 2.31092437,
+ "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'"
}
}
]
@@ -496,8 +496,8 @@ FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
o_totalprice between 200000 and 230000;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 Using where; Using index
-1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 Using where
+1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 Using where; Using index
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 Using where
set statement optimizer_switch='rowid_filter=off' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
@@ -509,35 +509,35 @@ EXPLAIN
"nested_loop": [
{
"table": {
- "table_name": "lineitem",
+ "table_name": "orders",
"access_type": "range",
- "possible_keys": [
- "PRIMARY",
- "i_l_shipdate",
- "i_l_orderkey",
- "i_l_orderkey_quantity"
- ],
- "key": "i_l_shipdate",
- "key_length": "4",
- "used_key_parts": ["l_shipDATE"],
- "rows": 98,
+ "possible_keys": ["PRIMARY", "i_o_totalprice"],
+ "key": "i_o_totalprice",
+ "key_length": "9",
+ "used_key_parts": ["o_totalprice"],
+ "rows": 71,
"filtered": 100,
- "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'",
+ "attached_condition": "orders.o_totalprice between 200000 and 230000",
"using_index": true
}
},
{
"table": {
- "table_name": "orders",
- "access_type": "eq_ref",
- "possible_keys": ["PRIMARY", "i_o_totalprice"],
+ "table_name": "lineitem",
+ "access_type": "ref",
+ "possible_keys": [
+ "PRIMARY",
+ "i_l_shipdate",
+ "i_l_orderkey",
+ "i_l_orderkey_quantity"
+ ],
"key": "PRIMARY",
"key_length": "4",
- "used_key_parts": ["o_orderkey"],
- "ref": ["dbt3_s001.lineitem.l_orderkey"],
- "rows": 1,
- "filtered": 4.733333111,
- "attached_condition": "orders.o_totalprice between 200000 and 230000"
+ "used_key_parts": ["l_orderkey"],
+ "ref": ["dbt3_s001.orders.o_orderkey"],
+ "rows": 4,
+ "filtered": 1.633319736,
+ "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'"
}
}
]
@@ -548,8 +548,8 @@ FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
o_totalprice between 200000 and 230000;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
-1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 98.00 100.00 100.00 Using where; Using index
-1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 4.73 11.22 Using where
+1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 71.00 100.00 100.00 Using where; Using index
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 1.63 2.31 Using where
set statement optimizer_switch='rowid_filter=off' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND
@@ -566,45 +566,45 @@ ANALYZE
"nested_loop": [
{
"table": {
- "table_name": "lineitem",
+ "table_name": "orders",
"access_type": "range",
- "possible_keys": [
- "PRIMARY",
- "i_l_shipdate",
- "i_l_orderkey",
- "i_l_orderkey_quantity"
- ],
- "key": "i_l_shipdate",
- "key_length": "4",
- "used_key_parts": ["l_shipDATE"],
+ "possible_keys": ["PRIMARY", "i_o_totalprice"],
+ "key": "i_o_totalprice",
+ "key_length": "9",
+ "used_key_parts": ["o_totalprice"],
"r_loops": 1,
- "rows": 98,
- "r_rows": 98,
+ "rows": 71,
+ "r_rows": 71,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
"filtered": 100,
"r_filtered": 100,
- "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'",
+ "attached_condition": "orders.o_totalprice between 200000 and 230000",
"using_index": true
}
},
{
"table": {
- "table_name": "orders",
- "access_type": "eq_ref",
- "possible_keys": ["PRIMARY", "i_o_totalprice"],
+ "table_name": "lineitem",
+ "access_type": "ref",
+ "possible_keys": [
+ "PRIMARY",
+ "i_l_shipdate",
+ "i_l_orderkey",
+ "i_l_orderkey_quantity"
+ ],
"key": "PRIMARY",
"key_length": "4",
- "used_key_parts": ["o_orderkey"],
- "ref": ["dbt3_s001.lineitem.l_orderkey"],
- "r_loops": 98,
- "rows": 1,
- "r_rows": 1,
+ "used_key_parts": ["l_orderkey"],
+ "ref": ["dbt3_s001.orders.o_orderkey"],
+ "r_loops": 71,
+ "rows": 4,
+ "r_rows": 6.704225352,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 4.733333111,
- "r_filtered": 11.2244898,
- "attached_condition": "orders.o_totalprice between 200000 and 230000"
+ "filtered": 1.633319736,
+ "r_filtered": 2.31092437,
+ "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'"
}
}
]
@@ -633,7 +633,7 @@ l_quantity > 45 AND
o_totalprice between 180000 and 230000;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 144 Using where; Using index
-1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (8%) Using where; Using rowid filter
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 Using where
set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
@@ -669,20 +669,12 @@ EXPLAIN
"i_l_orderkey_quantity",
"i_l_quantity"
],
- "key": "i_l_orderkey",
+ "key": "PRIMARY",
"key_length": "4",
"used_key_parts": ["l_orderkey"],
"ref": ["dbt3_s001.orders.o_orderkey"],
- "rowid_filter": {
- "range": {
- "key": "i_l_shipdate",
- "used_key_parts": ["l_shipDATE"]
- },
- "rows": 510,
- "selectivity_pct": 8.492922565
- },
"rows": 4,
- "filtered": 0.855656624,
+ "filtered": 0.856362581,
"attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30' and lineitem.l_quantity > 45"
}
}
@@ -696,7 +688,7 @@ l_quantity > 45 AND
o_totalprice between 180000 and 230000;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 144 144.00 100.00 100.00 Using where; Using index
-1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (8%) 0.54 (8%) 0.86 20.51 Using where; Using rowid filter
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.62 0.86 1.68 Using where
set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
@@ -742,29 +734,17 @@ ANALYZE
"i_l_orderkey_quantity",
"i_l_quantity"
],
- "key": "i_l_orderkey",
+ "key": "PRIMARY",
"key_length": "4",
"used_key_parts": ["l_orderkey"],
"ref": ["dbt3_s001.orders.o_orderkey"],
- "rowid_filter": {
- "range": {
- "key": "i_l_shipdate",
- "used_key_parts": ["l_shipDATE"]
- },
- "rows": 510,
- "selectivity_pct": 8.492922565,
- "r_rows": 510,
- "r_selectivity_pct": 8.176100629,
- "r_buffer_size": "REPLACED",
- "r_filling_time_ms": "REPLACED"
- },
"r_loops": 144,
"rows": 4,
- "r_rows": 0.541666667,
+ "r_rows": 6.625,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 0.855656624,
- "r_filtered": 20.51282051,
+ "filtered": 0.856362581,
+ "r_filtered": 1.677148847,
"attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30' and lineitem.l_quantity > 45"
}
}
@@ -841,7 +821,7 @@ EXPLAIN
"used_key_parts": ["l_orderkey"],
"ref": ["dbt3_s001.orders.o_orderkey"],
"rows": 4,
- "filtered": 0.855656624,
+ "filtered": 0.856362581,
"attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30' and lineitem.l_quantity > 45"
}
}
@@ -910,7 +890,7 @@ ANALYZE
"r_rows": 6.625,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 0.855656624,
+ "filtered": 0.856362581,
"r_filtered": 1.677148847,
"attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30' and lineitem.l_quantity > 45"
}
@@ -993,7 +973,7 @@ EXPLAIN
"used_key_parts": ["l_orderkey"],
"ref": ["dbt3_s001.orders.o_orderkey"],
"rows": 4,
- "filtered": 8.492922783,
+ "filtered": 8.499929428,
"attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'"
}
}
@@ -1006,7 +986,7 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
o_totalprice between 200000 and 230000;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 71.00 100.00 100.00 Using where; Using index
-1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 8.49 7.77 Using where
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 8.50 7.77 Using where
set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
@@ -1059,7 +1039,7 @@ ANALYZE
"r_rows": 6.704225352,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 8.492922783,
+ "filtered": 8.499929428,
"r_filtered": 7.773109244,
"attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'"
}
@@ -1154,7 +1134,7 @@ EXPLAIN
"used_key_parts": ["l_orderkey"],
"ref": ["dbt3_s001.orders.o_orderkey"],
"rows": 4,
- "filtered": 8.492922783,
+ "filtered": 8.499929428,
"attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'"
}
}
@@ -1167,7 +1147,7 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
o_totalprice between 200000 and 230000;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 71.00 100.00 100.00 Using where; Using index
-1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 8.49 7.77 Using where
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 8.50 7.77 Using where
set statement optimizer_switch='rowid_filter=off' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice
FROM orders JOIN lineitem ON o_orderkey=l_orderkey
WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND
@@ -1220,7 +1200,7 @@ ANALYZE
"r_rows": 6.704225352,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 8.492922783,
+ "filtered": 8.499929428,
"r_filtered": 7.773109244,
"attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'"
}
@@ -1312,7 +1292,7 @@ EXPLAIN
"key_length": "4",
"used_key_parts": ["l_receiptDATE"],
"rows": 18,
- "filtered": 100,
+ "filtered": 0.566194832,
"index_condition": "lineitem.l_receiptDATE between '1996-10-05' and '1996-10-10'",
"attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-10-10'"
}
@@ -1341,7 +1321,7 @@ l_shipdate BETWEEN '1996-10-01' AND '1996-10-10' AND
l_receiptdate BETWEEN '1996-10-05' AND '1996-10-10' AND
o_totalprice BETWEEN 200000 AND 250000;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
-1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_receiptdate,i_l_orderkey,i_l_orderkey_quantity i_l_receiptdate 4 NULL 18 18.00 100.00 38.89 Using index condition; Using where
+1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_receiptdate,i_l_orderkey,i_l_orderkey_quantity i_l_receiptdate 4 NULL 18 18.00 0.57 38.89 Using index condition; Using where
1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 5.67 14.29 Using where
set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT l_shipdate, l_receiptdate, o_totalprice
FROM orders, lineitem
@@ -1378,7 +1358,7 @@ ANALYZE
"r_rows": 18,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 100,
+ "filtered": 0.566194832,
"r_filtered": 38.88888889,
"index_condition": "lineitem.l_receiptDATE between '1996-10-05' and '1996-10-10'",
"attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-10-10'"
@@ -1449,7 +1429,7 @@ EXPLAIN
"key_length": "4",
"used_key_parts": ["l_receiptDATE"],
"rows": 18,
- "filtered": 100,
+ "filtered": 0.566194832,
"index_condition": "lineitem.l_receiptDATE between '1996-10-05' and '1996-10-10'",
"attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-10-10'"
}
@@ -1478,7 +1458,7 @@ l_shipdate BETWEEN '1996-10-01' AND '1996-10-10' AND
l_receiptdate BETWEEN '1996-10-05' AND '1996-10-10' AND
o_totalprice BETWEEN 200000 AND 250000;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
-1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_receiptdate,i_l_orderkey,i_l_orderkey_quantity i_l_receiptdate 4 NULL 18 18.00 100.00 38.89 Using index condition; Using where
+1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_receiptdate,i_l_orderkey,i_l_orderkey_quantity i_l_receiptdate 4 NULL 18 18.00 0.57 38.89 Using index condition; Using where
1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 5.67 14.29 Using where
set statement optimizer_switch='rowid_filter=off' for ANALYZE FORMAT=JSON SELECT l_shipdate, l_receiptdate, o_totalprice
FROM orders, lineitem
@@ -1515,7 +1495,7 @@ ANALYZE
"r_rows": 18,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 100,
+ "filtered": 0.566194832,
"r_filtered": 38.88888889,
"index_condition": "lineitem.l_receiptDATE between '1996-10-05' and '1996-10-10'",
"attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-10-10'"
@@ -1566,7 +1546,7 @@ o_totalprice BETWEEN 200000 AND 220000 AND
l_shipdate BETWEEN '1996-10-01' AND '1996-12-01';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE orders range PRIMARY,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 Using index condition; Using where
-1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (3%) Using where; Using rowid filter
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 Using where
set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_totaldiscount, o_totalprice, l_shipdate
FROM orders, lineitem
WHERE o_orderkey=l_orderkey AND
@@ -1587,7 +1567,7 @@ EXPLAIN
"key_length": "9",
"used_key_parts": ["o_totaldiscount"],
"rows": 41,
- "filtered": 100,
+ "filtered": 3.333333254,
"index_condition": "orders.o_totaldiscount between 18000 and 20000",
"attached_condition": "orders.o_totalprice between 200000 and 220000"
}
@@ -1602,20 +1582,12 @@ EXPLAIN
"i_l_orderkey",
"i_l_orderkey_quantity"
],
- "key": "i_l_orderkey",
+ "key": "PRIMARY",
"key_length": "4",
"used_key_parts": ["l_orderkey"],
"ref": ["dbt3_s001.orders.o_orderkey"],
- "rowid_filter": {
- "range": {
- "key": "i_l_shipdate",
- "used_key_parts": ["l_shipDATE"]
- },
- "rows": 183,
- "selectivity_pct": 3.04746045
- },
"rows": 4,
- "filtered": 3.047460556,
+ "filtered": 3.04997468,
"attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'"
}
}
@@ -1629,8 +1601,8 @@ o_totaldiscount BETWEEN 18000 AND 20000 AND
o_totalprice BETWEEN 200000 AND 220000 AND
l_shipdate BETWEEN '1996-10-01' AND '1996-12-01';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
-1 SIMPLE orders range PRIMARY,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 41.00 100.00 2.44 Using index condition; Using where
-1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (3%) 4.00 (66%) 3.05 100.00 Using where; Using rowid filter
+1 SIMPLE orders range PRIMARY,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 41.00 3.33 2.44 Using index condition; Using where
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.00 3.05 66.67 Using where
set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_totaldiscount, o_totalprice, l_shipdate
FROM orders, lineitem
WHERE o_orderkey=l_orderkey AND
@@ -1660,7 +1632,7 @@ ANALYZE
"r_rows": 41,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 100,
+ "filtered": 3.333333254,
"r_filtered": 2.43902439,
"index_condition": "orders.o_totaldiscount between 18000 and 20000",
"attached_condition": "orders.o_totalprice between 200000 and 220000"
@@ -1676,29 +1648,17 @@ ANALYZE
"i_l_orderkey",
"i_l_orderkey_quantity"
],
- "key": "i_l_orderkey",
+ "key": "PRIMARY",
"key_length": "4",
"used_key_parts": ["l_orderkey"],
"ref": ["dbt3_s001.orders.o_orderkey"],
- "rowid_filter": {
- "range": {
- "key": "i_l_shipdate",
- "used_key_parts": ["l_shipDATE"]
- },
- "rows": 183,
- "selectivity_pct": 3.04746045,
- "r_rows": 183,
- "r_selectivity_pct": 66.66666667,
- "r_buffer_size": "REPLACED",
- "r_filling_time_ms": "REPLACED"
- },
"r_loops": 1,
"rows": 4,
- "r_rows": 4,
+ "r_rows": 6,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 3.047460556,
- "r_filtered": 100,
+ "filtered": 3.04997468,
+ "r_filtered": 66.66666667,
"attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'"
}
}
@@ -1745,7 +1705,7 @@ EXPLAIN
"key_length": "9",
"used_key_parts": ["o_totaldiscount"],
"rows": 41,
- "filtered": 100,
+ "filtered": 3.333333254,
"index_condition": "orders.o_totaldiscount between 18000 and 20000",
"attached_condition": "orders.o_totalprice between 200000 and 220000"
}
@@ -1765,7 +1725,7 @@ EXPLAIN
"used_key_parts": ["l_orderkey"],
"ref": ["dbt3_s001.orders.o_orderkey"],
"rows": 4,
- "filtered": 3.047460556,
+ "filtered": 3.04997468,
"attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'"
}
}
@@ -1779,7 +1739,7 @@ o_totaldiscount BETWEEN 18000 AND 20000 AND
o_totalprice BETWEEN 200000 AND 220000 AND
l_shipdate BETWEEN '1996-10-01' AND '1996-12-01';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
-1 SIMPLE orders range PRIMARY,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 41.00 100.00 2.44 Using index condition; Using where
+1 SIMPLE orders range PRIMARY,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 41.00 3.33 2.44 Using index condition; Using where
1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.00 3.05 66.67 Using where
set statement optimizer_switch='rowid_filter=off' for ANALYZE FORMAT=JSON SELECT o_totaldiscount, o_totalprice, l_shipdate
FROM orders, lineitem
@@ -1810,7 +1770,7 @@ ANALYZE
"r_rows": 41,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 100,
+ "filtered": 3.333333254,
"r_filtered": 2.43902439,
"index_condition": "orders.o_totaldiscount between 18000 and 20000",
"attached_condition": "orders.o_totalprice between 200000 and 220000"
@@ -1835,7 +1795,7 @@ ANALYZE
"r_rows": 6,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 3.047460556,
+ "filtered": 3.04997468,
"r_filtered": 66.66666667,
"attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'"
}
@@ -1865,7 +1825,7 @@ o_totalprice BETWEEN 200000 AND 220000 AND
l_shipdate BETWEEN '1996-10-01' AND '1996-12-01';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE orders range PRIMARY,i_o_orderdate,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 Using index condition; Using where
-1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (3%) Using where; Using rowid filter
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 Using where
set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_totaldiscount, o_totalprice, l_shipdate
FROM v1, lineitem
WHERE o_orderkey=l_orderkey AND
@@ -1906,18 +1866,10 @@ EXPLAIN
"i_l_orderkey",
"i_l_orderkey_quantity"
],
- "key": "i_l_orderkey",
+ "key": "PRIMARY",
"key_length": "4",
"used_key_parts": ["l_orderkey"],
"ref": ["dbt3_s001.orders.o_orderkey"],
- "rowid_filter": {
- "range": {
- "key": "i_l_shipdate",
- "used_key_parts": ["l_shipDATE"]
- },
- "rows": 183,
- "selectivity_pct": 3.04746045
- },
"rows": 4,
"filtered": "REPLACED",
"attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'"
@@ -1934,7 +1886,7 @@ o_totalprice BETWEEN 200000 AND 220000 AND
l_shipdate BETWEEN '1996-10-01' AND '1996-12-01';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE orders range PRIMARY,i_o_orderdate,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 41.00 # 2.44 Using index condition; Using where
-1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (3%) 4.00 (66%) # 100.00 Using where; Using rowid filter
+1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.00 # 66.67 Using where
set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_totaldiscount, o_totalprice, l_shipdate
FROM v1, lineitem
WHERE o_orderkey=l_orderkey AND
@@ -1985,29 +1937,17 @@ ANALYZE
"i_l_orderkey",
"i_l_orderkey_quantity"
],
- "key": "i_l_orderkey",
+ "key": "PRIMARY",
"key_length": "4",
"used_key_parts": ["l_orderkey"],
"ref": ["dbt3_s001.orders.o_orderkey"],
- "rowid_filter": {
- "range": {
- "key": "i_l_shipdate",
- "used_key_parts": ["l_shipDATE"]
- },
- "rows": 183,
- "selectivity_pct": 3.04746045,
- "r_rows": 183,
- "r_selectivity_pct": 66.66666667,
- "r_buffer_size": "REPLACED",
- "r_filling_time_ms": "REPLACED"
- },
"r_loops": 1,
"rows": 4,
- "r_rows": 4,
+ "r_rows": 6,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
"filtered": "REPLACED",
- "r_filtered": 100,
+ "r_filtered": 66.66666667,
"attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'"
}
}
@@ -2246,7 +2186,7 @@ EXPLAIN EXTENDED
SELECT * FROM t1 HAVING (7, 9) IN (SELECT t2.i1, t2.i2 FROM t2 WHERE t2.i1 = 3);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL NULL Impossible HAVING
-2 SUBQUERY t2 ref i1,i2 i1 5 const 1 100.00 Using index condition; Using where
+2 SUBQUERY t2 ref i1,i2 i1 5 const 1 10.00 Using index condition; Using where
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk` from `test`.`t1` having 0
DROP TABLE t1,t2;
@@ -2284,7 +2224,7 @@ EXPLAIN EXTENDED SELECT * FROM t1 INNER JOIN t2 ON ( pk1 <> pk2 AND pk1 = a2 )
WHERE b1 <= ( SELECT MAX(b2) FROM t2 WHERE pk2 <= 1 );
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 101 100.00 Using where
-1 PRIMARY t1 eq_ref|filter PRIMARY,b1 PRIMARY|b1 4|4 test.t2.a2 1 (87%) 87.00 Using where; Using rowid filter
+1 PRIMARY t1 eq_ref PRIMARY,b1 PRIMARY 4 test.t2.a2 1 87.00 Using where
2 SUBQUERY t2 range PRIMARY PRIMARY 4 NULL 1 100.00 Using index condition
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`pk1` AS `pk1`,`test`.`t1`.`a1` AS `a1`,`test`.`t1`.`b1` AS `b1`,`test`.`t2`.`pk2` AS `pk2`,`test`.`t2`.`a2` AS `a2`,`test`.`t2`.`b2` AS `b2` from `test`.`t1` join `test`.`t2` where `test`.`t1`.`pk1` = `test`.`t2`.`a2` and `test`.`t1`.`b1` <= (/* select#2 */ select max(`test`.`t2`.`b2`) from `test`.`t2` where `test`.`t2`.`pk2` <= 1) and `test`.`t2`.`a2` <> `test`.`t2`.`pk2`
@@ -2313,14 +2253,6 @@ EXPLAIN
"key_length": "4",
"used_key_parts": ["pk1"],
"ref": ["test.t2.a2"],
- "rowid_filter": {
- "range": {
- "key": "b1",
- "used_key_parts": ["b1"]
- },
- "rows": 87,
- "selectivity_pct": 87
- },
"rows": 1,
"filtered": 87,
"attached_condition": "t1.b1 <= (subquery#2)"
@@ -2387,7 +2319,7 @@ explain
select * from t1
where el_index like '10%' and (el_index_60 like '10%' or el_index_60 like '20%');
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 range el_index,el_index_60 el_index 62 NULL 1000 Using where
+1 SIMPLE t1 ALL el_index,el_index_60 NULL NULL NULL 10000 Using where
drop table t10, t11, t1;
#
# MDEV-22160: SIGSEGV in st_join_table::save_explain_data on SELECT
@@ -2442,8 +2374,8 @@ pk a b c
7 5 k 5
explain SELECT * FROM t1 JOIN t2 WHERE a = c AND pk BETWEEN 4 AND 7 AND a BETWEEN 2 AND 12 AND b != 'foo';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t2 ALL NULL NULL NULL NULL 6 Using where
-1 SIMPLE t1 ref|filter PRIMARY,a,a_2 a|PRIMARY 5|4 test.t2.c 3 (4%) Using where; Using rowid filter
+1 SIMPLE t1 range|filter PRIMARY,a,a_2 PRIMARY|a 4|5 NULL 4 (11%) Using index condition; Using where; Using rowid filter
+1 SIMPLE t2 ALL NULL NULL NULL NULL 6 Using where; Using join buffer (flat, BNL join)
SET optimizer_switch='rowid_filter=off';
SELECT * FROM t1 JOIN t2 WHERE a = c AND pk BETWEEN 4 AND 7 AND a BETWEEN 2 AND 12 AND b != 'foo';
pk a b c
@@ -2530,7 +2462,7 @@ EXPLAIN
]
},
"rows": 1,
- "filtered": 100,
+ "filtered": 1.587301612,
"attached_condition": "t1.f1 is null and t1.f2 is null and (t1.f2 between 'a' and 'z' or t1.f1 = 'a')"
}
}
@@ -2565,7 +2497,7 @@ EXPLAIN
]
},
"rows": 1,
- "filtered": 100,
+ "filtered": 1.587301612,
"attached_condition": "t1.f1 is null and t1.f2 is null and (t1.f2 between 'a' and 'z' or t1.f1 = 'a')"
}
}
@@ -2592,7 +2524,7 @@ id y x
1 2 1
explain extended select * from t1 join t2 on t1.id = t2.x where t2.y = 2 and t1.id = 1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 const PRIMARY PRIMARY 4 const 1 # Using index
+1 SIMPLE t1 const PRIMARY PRIMARY 4 const 1 #
1 SIMPLE t2 index_merge x,y y,x 5,5 NULL 1 # Using intersect(y,x); Using where; Using index
Warnings:
Note 1003 select 1 AS `id`,`test`.`t2`.`y` AS `y`,`test`.`t2`.`x` AS `x` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`y` = 2 and `test`.`t2`.`x` = 1
@@ -2614,7 +2546,7 @@ count(*)
6
explain extended select count(*) from t1 where a in (22,83,11) and b=2;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 range b,a a 5 NULL 33 100.00 Using index condition; Using where
+1 SIMPLE t1 range b,a a 5 NULL 33 5.90 Using index condition; Using where
Warnings:
Note 1003 select count(0) AS `count(*)` from `test`.`t1` where `test`.`t1`.`b` = 2 and `test`.`t1`.`a` in (22,83,11)
select * from t1 where a in (22,83,11) and b=2;
@@ -2717,11 +2649,11 @@ t1.id2 = t1.id);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t3 ALL NULL NULL NULL NULL 1 100.00 Using where
2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 10 100.00 Using where
-3 MATERIALIZED t2 range PRIMARY,col_date_key,ch2,id2 ch2 4 NULL 2 100.00 Using where; Using index
-3 MATERIALIZED bt1 ALL NULL NULL NULL NULL 10 100.00 Using where; Using join buffer (flat, BNL join)
+3 DEPENDENT SUBQUERY t2 range PRIMARY,col_date_key,ch2,id2 ch2 4 NULL 2 100.00 Using where; Using index
+3 DEPENDENT SUBQUERY bt1 ALL NULL NULL NULL NULL 10 100.00 Using where; Using join buffer (flat, BNL join)
Warnings:
Note 1276 Field or reference 'test.t3.id' of SELECT #2 was resolved in SELECT #1
-Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t3` where <in_optimizer>(1,<expr_cache><`test`.`t3`.`id`>(exists(/* select#2 */ select 1 from `test`.`t1` where <expr_cache><`test`.`t3`.`id`>(<in_optimizer>(`test`.`t3`.`id`,`test`.`t3`.`id` in ( <materialize> (/* select#3 */ select `test`.`bt1`.`id` from `test`.`t2` join `test`.`t1` `bt1` where `test`.`bt1`.`id` = `test`.`t2`.`pk` and `test`.`t2`.`ch2` <= 'g' ), <primary_index_lookup>(`test`.`t3`.`id` in <temporary table> on distinct_key where `test`.`t3`.`id` = `<subquery3>`.`id`)))) or `test`.`t1`.`id2` = `test`.`t1`.`id` limit 1)))
+Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t3` where <in_optimizer>(1,<expr_cache><`test`.`t3`.`id`>(exists(/* select#2 */ select 1 from `test`.`t1` where <expr_cache><`test`.`t3`.`id`>(<in_optimizer>(`test`.`t3`.`id`,<exists>(/* select#3 */ select `test`.`bt1`.`id` from `test`.`t2` join `test`.`t1` `bt1` where `test`.`bt1`.`id` = `test`.`t2`.`pk` and `test`.`t2`.`ch2` <= 'g' and <cache>(`test`.`t3`.`id`) = `test`.`bt1`.`id`))) or `test`.`t1`.`id2` = `test`.`t1`.`id` limit 1)))
SELECT 1 FROM t3
WHERE EXISTS ( SELECT 1 FROM t1
WHERE t3.id IN ( SELECT bt1.id FROM t2, t1 AS bt1
@@ -2740,11 +2672,11 @@ t1.id2 = t1.id);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t3 ALL NULL NULL NULL NULL 1 100.00 Using where
2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 10 100.00 Using where
-3 MATERIALIZED t2 range PRIMARY,col_date_key,ch2,id2 ch2 4 NULL 2 100.00 Using where; Using index
-3 MATERIALIZED bt1 ALL NULL NULL NULL NULL 10 100.00 Using where; Using join buffer (flat, BNL join)
+3 DEPENDENT SUBQUERY t2 range PRIMARY,col_date_key,ch2,id2 ch2 4 NULL 2 100.00 Using where; Using index
+3 DEPENDENT SUBQUERY bt1 ALL NULL NULL NULL NULL 10 100.00 Using where; Using join buffer (flat, BNL join)
Warnings:
Note 1276 Field or reference 'test.t3.id' of SELECT #2 was resolved in SELECT #1
-Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t3` where <in_optimizer>(1,<expr_cache><`test`.`t3`.`id`>(exists(/* select#2 */ select 1 from `test`.`t1` where <expr_cache><`test`.`t3`.`id`>(<in_optimizer>(`test`.`t3`.`id`,`test`.`t3`.`id` in ( <materialize> (/* select#3 */ select `test`.`bt1`.`id` from `test`.`t2` join `test`.`t1` `bt1` where `test`.`bt1`.`ch` = `test`.`t2`.`ch2` and `test`.`bt1`.`id` = `test`.`t2`.`pk` and `test`.`t2`.`ch2` <= 'g' ), <primary_index_lookup>(`test`.`t3`.`id` in <temporary table> on distinct_key where `test`.`t3`.`id` = `<subquery3>`.`id`)))) or `test`.`t1`.`id2` = `test`.`t1`.`id` limit 1)))
+Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t3` where <in_optimizer>(1,<expr_cache><`test`.`t3`.`id`>(exists(/* select#2 */ select 1 from `test`.`t1` where <expr_cache><`test`.`t3`.`id`>(<in_optimizer>(`test`.`t3`.`id`,<exists>(/* select#3 */ select `test`.`bt1`.`id` from `test`.`t2` join `test`.`t1` `bt1` where `test`.`bt1`.`ch` = `test`.`t2`.`ch2` and `test`.`bt1`.`id` = `test`.`t2`.`pk` and `test`.`t2`.`ch2` <= 'g' and <cache>(`test`.`t3`.`id`) = `test`.`bt1`.`id`))) or `test`.`t1`.`id2` = `test`.`t1`.`id` limit 1)))
SELECT 1 FROM t3
WHERE EXISTS ( SELECT 1 FROM t1
WHERE t3.id IN ( SELECT bt1.id FROM t2, t1 AS bt1
@@ -2781,7 +2713,7 @@ test.t1 analyze status OK
explain extended select count(0) from t1
where id=15066 and (match s against ('+"fttest"' in boolean mode));
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 fulltext id,s s 0 1 1.64 Using where
+1 SIMPLE t1 ref id,s id 5 const 1 100.00 Using where
Warnings:
Note 1003 select count(0) AS `count(0)` from `test`.`t1` where `test`.`t1`.`id` = 15066 and (match `test`.`t1`.`s` against ('+"fttest"' in boolean mode))
select count(0) from t1
@@ -2899,7 +2831,7 @@ WHERE 1 = 1 AND domain = 'www.mailhost.i-dev.fr' AND
timestamp >= DATE_ADD(CURRENT_TIMESTAMP, INTERVAL -1 MONTH)
ORDER BY timestamp DESC;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 range ixEventWhoisDomainDomain,ixEventWhoisDomainTimestamp ixEventWhoisDomainTimestamp 4 NULL 1 100.00 Using where
+1 SIMPLE t1 range ixEventWhoisDomainDomain,ixEventWhoisDomainTimestamp ixEventWhoisDomainTimestamp 4 NULL 1 28.57 Using where
Warnings:
Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`domain` AS `domain`,`test`.`t1`.`registrant_name` AS `registrant_name`,`test`.`t1`.`registrant_organization` AS `registrant_organization`,`test`.`t1`.`registrant_street1` AS `registrant_street1`,`test`.`t1`.`registrant_street2` AS `registrant_street2`,`test`.`t1`.`registrant_street3` AS `registrant_street3`,`test`.`t1`.`registrant_street4` AS `registrant_street4`,`test`.`t1`.`registrant_street5` AS `registrant_street5`,`test`.`t1`.`registrant_city` AS `registrant_city`,`test`.`t1`.`registrant_postal_code` AS `registrant_postal_code`,`test`.`t1`.`registrant_country` AS `registrant_country`,`test`.`t1`.`registrant_email` AS `registrant_email`,`test`.`t1`.`registrant_telephone` AS `registrant_telephone`,`test`.`t1`.`administrative_name` AS `administrative_name`,`test`.`t1`.`administrative_organization` AS `administrative_organization`,`test`.`t1`.`administrative_street1` AS `administrative_street1`,`test`.`t1`.`administrative_street2` AS `administrative_street2`,`test`.`t1`.`administrative_street3` AS `administrative_street3`,`test`.`t1`.`administrative_street4` AS `administrative_street4`,`test`.`t1`.`administrative_street5` AS `administrative_street5`,`test`.`t1`.`administrative_city` AS `administrative_city`,`test`.`t1`.`administrative_postal_code` AS `administrative_postal_code`,`test`.`t1`.`administrative_country` AS `administrative_country`,`test`.`t1`.`administrative_email` AS `administrative_email`,`test`.`t1`.`administrative_telephone` AS `administrative_telephone`,`test`.`t1`.`technical_name` AS `technical_name`,`test`.`t1`.`technical_organization` AS `technical_organization`,`test`.`t1`.`technical_street1` AS `technical_street1`,`test`.`t1`.`technical_street2` AS `technical_street2`,`test`.`t1`.`technical_street3` AS `technical_street3`,`test`.`t1`.`technical_street4` AS `technical_street4`,`test`.`t1`.`technical_street5` AS `technical_street5`,`test`.`t1`.`technical_city` AS `technical_city`,`test`.`t1`.`technical_postal_code` AS `technical_postal_code`,`test`.`t1`.`technical_country` AS `technical_country`,`test`.`t1`.`technical_email` AS `technical_email`,`test`.`t1`.`technical_telephone` AS `technical_telephone`,`test`.`t1`.`json` AS `json`,`test`.`t1`.`timestamp` AS `timestamp` from `test`.`t1` where `test`.`t1`.`domain` = 'www.mailhost.i-dev.fr' and `test`.`t1`.`timestamp` >= <cache>(current_timestamp() + interval -1 month) order by `test`.`t1`.`timestamp` desc
SET optimizer_switch=@save_optimizer_switch;
@@ -2946,7 +2878,7 @@ SELECT * FROM t1
WHERE (a BETWEEN 9 AND 10 OR a IS NULL) AND (b BETWEEN 9 AND 10 OR b = 9)
ORDER BY pk LIMIT 1;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 SIMPLE t1 index a,b PRIMARY 4 NULL 73 100.00 Using where
+1 SIMPLE t1 index a,b PRIMARY 4 NULL 73 56.05 Using where
Warnings:
Note 1003 select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where (`test`.`t1`.`a` between 9 and 10 or `test`.`t1`.`a` is null) and (`test`.`t1`.`b` between 9 and 10 or `test`.`t1`.`b` = 9) order by `test`.`t1`.`pk` limit 1
ANALYZE
@@ -2954,7 +2886,7 @@ SELECT * FROM t1
WHERE (a BETWEEN 9 AND 10 OR a IS NULL) AND (b BETWEEN 9 AND 10 OR b = 9)
ORDER BY pk LIMIT 1;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
-1 SIMPLE t1 index a,b PRIMARY 4 NULL 3008 3008.00 6.38 0.00 Using where
+1 SIMPLE t1 index a,b PRIMARY 4 NULL 3008 3008.00 1.36 0.00 Using where
DROP TABLE t1;
SET global innodb_stats_persistent= @stats.save;
#
@@ -3087,7 +3019,7 @@ fi.fh in (6311439873746261694,-397087483897438286,
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t index_merge PRIMARY,acli_rid,acli_tp acli_tp,acli_rid 2,767 NULL 2 100.00 Using intersect(acli_tp,acli_rid); Using where; Using index
1 SIMPLE a ref PRIMARY,acei_aclid acei_aclid 8 test.t.id 1 100.00 Using where
-1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 17.14 Using where
+1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 26.09 Using where
Warnings:
Note 1003 select `test`.`t`.`id` AS `id`,`test`.`fi`.`id` AS `id`,`test`.`fi`.`aceid` AS `aceid`,`test`.`fi`.`clid` AS `clid`,`test`.`fi`.`fh` AS `fh` from `test`.`acli` `t` join `test`.`acei` `a` join `test`.`filt` `fi` where `test`.`t`.`tp` = 121 and `test`.`a`.`atp` = 1 and `test`.`fi`.`aceid` = `test`.`a`.`id` and `test`.`a`.`aclid` = `test`.`t`.`id` and `test`.`t`.`rid` = 'B5FCC8C7111E4E3CBC21AAF5012F59C2' and `test`.`fi`.`fh` in (6311439873746261694,-397087483897438286,8518228073041491534,-5420422472375069774)
set statement optimizer_switch='rowid_filter=off' for select t.id, fi.*
@@ -3114,7 +3046,7 @@ fi.fh in (6311439873746261694,-397087483897438286,
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t index_merge PRIMARY,acli_rid,acli_tp acli_tp,acli_rid 2,767 NULL 2 100.00 Using intersect(acli_tp,acli_rid); Using where; Using index
1 SIMPLE a ref PRIMARY,acei_aclid acei_aclid 8 test.t.id 1 100.00 Using where
-1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 17.14 Using where
+1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 26.09 Using where
Warnings:
Note 1003 select `test`.`t`.`id` AS `id`,`test`.`fi`.`id` AS `id`,`test`.`fi`.`aceid` AS `aceid`,`test`.`fi`.`clid` AS `clid`,`test`.`fi`.`fh` AS `fh` from `test`.`acli` `t` join `test`.`acei` `a` join `test`.`filt` `fi` where `test`.`t`.`tp` = 121 and `test`.`a`.`atp` = 1 and `test`.`fi`.`aceid` = `test`.`a`.`id` and `test`.`a`.`aclid` = `test`.`t`.`id` and `test`.`t`.`rid` = 'B5FCC8C7111E4E3CBC21AAF5012F59C2' and `test`.`fi`.`fh` in (6311439873746261694,-397087483897438286,8518228073041491534,-5420422472375069774)
set statement optimizer_switch='rowid_filter=on' for select t.id, fi.*
@@ -3143,7 +3075,7 @@ fi.fh in (6311439873746261694,-397087483897438286,
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t index_merge PRIMARY,acli_rid,acli_tp acli_tp,acli_rid 2,767 NULL 2 100.00 Using intersect(acli_tp,acli_rid); Using where; Using index
1 SIMPLE a ref PRIMARY,acei_aclid acei_aclid 8 test.t.id 1 100.00 Using where; Using join buffer (flat, BKA join); Rowid-ordered scan
-1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 17.14 Using where; Using join buffer (incremental, BKA join); Rowid-ordered scan
+1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 26.09 Using where; Using join buffer (incremental, BKA join); Rowid-ordered scan
Warnings:
Note 1003 select `test`.`t`.`id` AS `id`,`test`.`fi`.`id` AS `id`,`test`.`fi`.`aceid` AS `aceid`,`test`.`fi`.`clid` AS `clid`,`test`.`fi`.`fh` AS `fh` from `test`.`acli` `t` join `test`.`acei` `a` join `test`.`filt` `fi` where `test`.`t`.`tp` = 121 and `test`.`a`.`atp` = 1 and `test`.`fi`.`aceid` = `test`.`a`.`id` and `test`.`a`.`aclid` = `test`.`t`.`id` and `test`.`t`.`rid` = 'B5FCC8C7111E4E3CBC21AAF5012F59C2' and `test`.`fi`.`fh` in (6311439873746261694,-397087483897438286,8518228073041491534,-5420422472375069774)
set statement optimizer_switch='rowid_filter=off' for select t.id, fi.*
@@ -3170,7 +3102,7 @@ fi.fh in (6311439873746261694,-397087483897438286,
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t index_merge PRIMARY,acli_rid,acli_tp acli_tp,acli_rid 2,767 NULL 2 100.00 Using intersect(acli_tp,acli_rid); Using where; Using index
1 SIMPLE a ref PRIMARY,acei_aclid acei_aclid 8 test.t.id 1 100.00 Using where; Using join buffer (flat, BKA join); Rowid-ordered scan
-1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 17.14 Using where; Using join buffer (incremental, BKA join); Rowid-ordered scan
+1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 26.09 Using where; Using join buffer (incremental, BKA join); Rowid-ordered scan
Warnings:
Note 1003 select `test`.`t`.`id` AS `id`,`test`.`fi`.`id` AS `id`,`test`.`fi`.`aceid` AS `aceid`,`test`.`fi`.`clid` AS `clid`,`test`.`fi`.`fh` AS `fh` from `test`.`acli` `t` join `test`.`acei` `a` join `test`.`filt` `fi` where `test`.`t`.`tp` = 121 and `test`.`a`.`atp` = 1 and `test`.`fi`.`aceid` = `test`.`a`.`id` and `test`.`a`.`aclid` = `test`.`t`.`id` and `test`.`t`.`rid` = 'B5FCC8C7111E4E3CBC21AAF5012F59C2' and `test`.`fi`.`fh` in (6311439873746261694,-397087483897438286,8518228073041491534,-5420422472375069774)
set statement optimizer_switch='rowid_filter=on' for select t.id, fi.*
@@ -3260,7 +3192,8 @@ ANALYZE
"join_type": "BKA",
"mrr_type": "Rowid-ordered scan",
"attached_condition": "a.atp = 1",
- "r_filtered": 100
+ "r_filtered": 100,
+ "r_unpack_time_ms": "REPLACED"
}
},
{
@@ -3278,7 +3211,7 @@ ANALYZE
"r_rows": 5,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
- "filtered": 17.1428566,
+ "filtered": 26.08628654,
"r_filtered": 100
},
"buffer_type": "incremental",
@@ -3286,7 +3219,8 @@ ANALYZE
"join_type": "BKA",
"mrr_type": "Rowid-ordered scan",
"attached_condition": "fi.fh in (6311439873746261694,-397087483897438286,8518228073041491534,-5420422472375069774)",
- "r_filtered": 40
+ "r_filtered": 40,
+ "r_unpack_time_ms": "REPLACED"
}
}
]
@@ -3333,7 +3267,7 @@ WHERE t1.c1 NOT IN (SELECT t2.c1 FROM t2, t1 AS a1
WHERE t2.i1 = t1.pk AND t2.i1 IS NOT NULL);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 15 100.00 Using where
-2 DEPENDENT SUBQUERY t2 ref|filter c1,i1 c1|i1 3|5 func 6 (33%) 33.33 Using where; Full scan on NULL key; Using rowid filter
+2 DEPENDENT SUBQUERY t2 ref|filter c1,i1 c1|i1 3|5 func 6 (33%) 11.11 Using where; Full scan on NULL key; Using rowid filter
2 DEPENDENT SUBQUERY a1 ALL NULL NULL NULL NULL 15 100.00 Using join buffer (flat, BNL join)
Warnings:
Note 1276 Field or reference 'test.t1.pk' of SELECT #2 was resolved in SELECT #1
diff --git a/mysql-test/main/show_explain_json.result b/mysql-test/main/show_explain_json.result
index daf8a3f5ef4..a5c441af5b8 100644
--- a/mysql-test/main/show_explain_json.result
+++ b/mysql-test/main/show_explain_json.result
@@ -51,13 +51,13 @@ SHOW EXPLAIN
{
"table": {
"table_name": "t1",
- "access_type": "index",
+ "access_type": "range",
"possible_keys": ["a"],
"key": "a",
"key_length": "5",
"used_key_parts": ["a"],
- "rows": 1000,
- "filtered": 99.90000153,
+ "rows": 999,
+ "filtered": 100,
"attached_condition": "t1.a < 100000",
"using_index": true
}
diff --git a/mysql-test/main/sp.result b/mysql-test/main/sp.result
index 37fdf62a6a9..0911a5276dd 100644
--- a/mysql-test/main/sp.result
+++ b/mysql-test/main/sp.result
@@ -2173,8 +2173,8 @@ create procedure bug3734 (param1 varchar(100))
select * from t3 where match (title,body) against (param1)|
call bug3734('database')|
id title body
-5 MySQL vs. YourSQL In the following database comparison ...
1 MySQL Tutorial DBMS stands for DataBase ...
+5 MySQL vs. YourSQL In the following database comparison ...
call bug3734('Security')|
id title body
6 MySQL Security When configured properly, MySQL ...
diff --git a/mysql-test/main/status.result b/mysql-test/main/status.result
index 78a39ee2ecf..ae64f370e45 100644
--- a/mysql-test/main/status.result
+++ b/mysql-test/main/status.result
@@ -71,10 +71,10 @@ a
6
show status like 'last_query_cost';
Variable_name Value
-Last_query_cost 13.542725
+Last_query_cost 0.017820
show status like 'last_query_cost';
Variable_name Value
-Last_query_cost 13.542725
+Last_query_cost 0.017820
select 1;
1
1
@@ -134,20 +134,20 @@ a
1
SHOW SESSION STATUS LIKE 'Last_query_cost';
Variable_name Value
-Last_query_cost 1.501709
+Last_query_cost 0.010313
EXPLAIN SELECT a FROM t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 2
SHOW SESSION STATUS LIKE 'Last_query_cost';
Variable_name Value
-Last_query_cost 1.501709
+Last_query_cost 0.010313
SELECT a FROM t1 UNION SELECT a FROM t1 ORDER BY a;
a
1
2
SHOW SESSION STATUS LIKE 'Last_query_cost';
Variable_name Value
-Last_query_cost 0.000000
+Last_query_cost 0.010313
EXPLAIN SELECT a FROM t1 UNION SELECT a FROM t1 ORDER BY a;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 2
@@ -155,25 +155,25 @@ id select_type table type possible_keys key key_len ref rows Extra
NULL UNION RESULT <union1,2> ALL NULL NULL NULL NULL NULL Using filesort
SHOW SESSION STATUS LIKE 'Last_query_cost';
Variable_name Value
-Last_query_cost 0.000000
+Last_query_cost 0.010313
SELECT a IN (SELECT a FROM t1) FROM t1 LIMIT 1;
a IN (SELECT a FROM t1)
1
SHOW SESSION STATUS LIKE 'Last_query_cost';
Variable_name Value
-Last_query_cost 0.000000
+Last_query_cost 0.010313
SELECT (SELECT a FROM t1 LIMIT 1) x FROM t1 LIMIT 1;
x
1
SHOW SESSION STATUS LIKE 'Last_query_cost';
Variable_name Value
-Last_query_cost 0.000000
+Last_query_cost 0.010313
SELECT * FROM t1 a, t1 b LIMIT 1;
a a
1 1
SHOW SESSION STATUS LIKE 'Last_query_cost';
Variable_name Value
-Last_query_cost 3.953418
+Last_query_cost 0.021119
DROP TABLE t1;
connect con1,localhost,root,,;
show status like 'com_show_status';
diff --git a/mysql-test/main/table_elim.result b/mysql-test/main/table_elim.result
index 8ae5522e8bc..a99afdef3ec 100644
--- a/mysql-test/main/table_elim.result
+++ b/mysql-test/main/table_elim.result
@@ -337,7 +337,7 @@ id select_type table type possible_keys key key_len ref rows Extra
explain select t1.a from t1 left join t2 on t2.pk between 0.5 and 1.5;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 4
-1 SIMPLE t2 index PRIMARY PRIMARY 4 NULL 2 Using where; Using index
+1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 2 Using where; Using index
explain select t1.a from t1 left join t2 on t2.pk between 10 and 10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 4
@@ -408,7 +408,7 @@ select t1.*
from t1 left join t2 on t2.pk=3 or t2.pk= 4;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 4
-1 SIMPLE t2 index PRIMARY PRIMARY 4 NULL 2 Using where; Using index
+1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 2 Using where; Using index
explain
select t1.*
from t1 left join t2 on t2.pk=3 or t2.pk= 3;
@@ -419,7 +419,7 @@ select t1.*
from t1 left join t2 on (t2.pk=3 and t2.b=3) or (t2.pk= 4 and t2.b=3);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 4
-1 SIMPLE t2 ALL PRIMARY NULL NULL NULL 2 Using where
+1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 2 Using where
drop table t1, t2;
#
# LPBUG#523593: Running RQG optimizer_no_subquery crashes MariaDB
@@ -562,7 +562,10 @@ LEFT JOIN t1 ON t4.f1 = t1.f1
JOIN t5 ON t4.f3 ON t3.f1 = t5.f5 ON t2.f4 = t3.f4
WHERE t3.f2 ;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
+1 SIMPLE t3 ALL NULL NULL NULL NULL 2 Using where
+1 SIMPLE t5 ref f5 f5 5 test.t3.f1 2 Using where; Using index
+1 SIMPLE t4 ALL NULL NULL NULL NULL 3 Using where
+1 SIMPLE t2 ref f4 f4 1003 test.t3.f4 2 Using where
# ^^ The above must not produce a QEP of t3,t5,t2,t4
# as that violates the "no interleaving of outer join nests" rule.
DROP TABLE t1,t2,t3,t4,t5;
diff --git a/mysql-test/main/type_ranges.result b/mysql-test/main/type_ranges.result
index 02b6c79bdf5..012d1fc67ce 100644
--- a/mysql-test/main/type_ranges.result
+++ b/mysql-test/main/type_ranges.result
@@ -173,12 +173,12 @@ PRIMARY KEY (auto)
);
INSERT IGNORE INTO t2 (string,mediumblob_col,new_field) SELECT string,mediumblob_col,new_field from t1 where auto > 10;
Warnings:
+Warning 1265 Data truncated for column 'new_field' at row 1
Warning 1265 Data truncated for column 'new_field' at row 2
Warning 1265 Data truncated for column 'new_field' at row 3
Warning 1265 Data truncated for column 'new_field' at row 4
Warning 1265 Data truncated for column 'new_field' at row 5
Warning 1265 Data truncated for column 'new_field' at row 6
-Warning 1265 Data truncated for column 'new_field' at row 7
select * from t2;
auto string mediumblob_col new_field
1 2 2 ne
diff --git a/mysql-test/main/type_time_6065.result b/mysql-test/main/type_time_6065.result
index 75b272430c1..a9f64fd9870 100644
--- a/mysql-test/main/type_time_6065.result
+++ b/mysql-test/main/type_time_6065.result
@@ -2267,7 +2267,7 @@ outr.col_varchar_key IS NULL
);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY outr system col_datetime_key NULL NULL NULL 1 100.00
-1 PRIMARY innr ref col_int_key col_int_key 4 const 2 100.00 Using where; FirstMatch(outr)
+1 PRIMARY innr ref col_int_key col_int_key 4 const 2 50.00 Using where; FirstMatch(outr)
1 PRIMARY outr2 index col_time_key col_time_key 4 NULL 20 100.00 Using where; Using index; Using join buffer (flat, BNL join)
Warnings:
Note 1003 select 1 AS `col_int_nokey` from `test`.`t3` `outr2` semi join (`test`.`t1` `innr`) where `test`.`innr`.`col_int_key` = 1 and `test`.`innr`.`pk` >= `test`.`innr`.`col_int_nokey` and `test`.`outr2`.`col_time_key` > '2001-11-04 19:07:55'
diff --git a/mysql-test/main/user_var.result b/mysql-test/main/user_var.result
index 122a4233e24..924c252b951 100644
--- a/mysql-test/main/user_var.result
+++ b/mysql-test/main/user_var.result
@@ -22,7 +22,7 @@ i @vv1:=if(sv1.i,1,0) @vv2:=if(sv2.i,1,0) @vv3:=if(sv3.i,1,0) @vv1+@vv2+@vv3
2 1 0 0 1
explain select * from t1 where i=@vv1;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 ALL i NULL NULL NULL 3 Using where
+1 SIMPLE t1 ref i i 4 const 2
select @vv1,i,v from t1 where i=@vv1;
@vv1 i v
1 1 1
@@ -35,7 +35,7 @@ id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL i 4 NULL 3 Using where; Using index
explain select * from t1 where i=@vv1;
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE t1 ALL i NULL NULL NULL 3 Using where
+1 SIMPLE t1 ref i i 4 const 2
drop table t1,t2;
set @a=0,@b=0;
select @a:=10, @b:=1, @a > @b, @a < @b;
diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c
index 9f13ca7a424..0f78ea90bdd 100644
--- a/mysys/my_getopt.c
+++ b/mysys/my_getopt.c
@@ -86,6 +86,7 @@ my_bool my_getopt_prefix_matching= 1;
my_bool my_handle_options_init_variables = 1;
my_getopt_value my_getopt_get_addr= 0;
+my_getopt_adjust my_getopt_adjust_value= 0;
static void default_reporter(enum loglevel level, const char *format, ...)
{
@@ -897,7 +898,12 @@ static int setval(const struct my_option *opts, void *value, char *argument,
goto ret;
};
}
+
+ if (opts->var_type & GET_ADJUST_VALUE)
+ (*my_getopt_adjust_value)(opts, value);
+
validate_value(opts->name, argument, option_file);
+
DBUG_RETURN(0);
ret:
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 530a33193d3..24b68fedd94 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -174,7 +174,7 @@ SET (SQL_SOURCE
sql_tvc.cc sql_tvc.h
opt_split.cc
rowid_filter.cc rowid_filter.h
- optimizer_costs.h
+ optimizer_costs.h optimizer_defaults.h
opt_trace.cc
table_cache.cc encryption.cc temporary_tables.cc
json_table.cc
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 071108f7e91..96eabfdab89 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -1132,8 +1132,6 @@ write_keys(Sort_param *param, SORT_INFO *fs_info, uint count,
for (uint ix= 0; ix < count; ++ix)
{
uchar *record= fs_info->get_sorted_record(ix);
-
-
if (my_b_write(tempfile, record, param->get_record_length(record)))
DBUG_RETURN(1); /* purecov: inspected */
}
@@ -1678,7 +1676,7 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek,
num_bytes_read= bytes_to_read;
buffpek->init_current_key();
- buffpek->advance_file_position(num_bytes_read); /* New filepos */
+ buffpek->advance_file_position(num_bytes_read); /* New filepos */
buffpek->decrement_rowcount(count);
buffpek->set_mem_count(count);
return (ulong) num_bytes_read;
diff --git a/sql/filesort_utils.cc b/sql/filesort_utils.cc
index 854033cc8d8..1aa17deb16e 100644
--- a/sql/filesort_utils.cc
+++ b/sql/filesort_utils.cc
@@ -19,7 +19,7 @@
#include "sql_const.h"
#include "sql_sort.h"
#include "table.h"
-
+#include "optimizer_defaults.h"
PSI_memory_key key_memory_Filesort_buffer_sort_keys;
@@ -58,7 +58,6 @@ const LEX_CSTRING filesort_names[]=
Cost of the operation.
*/
-static
double get_qsort_sort_cost(ha_rows num_rows, bool with_addon_fields)
{
const double row_copy_cost= with_addon_fields ? DEFAULT_ROW_COPY_COST :
@@ -106,12 +105,13 @@ double get_pq_sort_cost(size_t num_rows, size_t queue_size,
static
double get_merge_cost(ha_rows num_elements, ha_rows num_buffers,
- size_t elem_size, double compare_cost)
+ size_t elem_size, double compare_cost,
+ double disk_read_cost)
{
/* 2 -> 1 read + 1 write */
const double io_cost= (2.0 * (num_elements * elem_size +
DISK_CHUNK_SIZE - 1) /
- DISK_CHUNK_SIZE);
+ DISK_CHUNK_SIZE) * disk_read_cost;
/* 2 -> 1 insert, 1 pop for the priority queue used to merge the buffers. */
const double cpu_cost= (2.0 * num_elements * log2(1.0 + num_buffers) *
compare_cost) * PQ_SORT_SLOWNESS_CORRECTION_FACTOR;
@@ -131,6 +131,7 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows,
ha_rows num_keys_per_buffer,
size_t elem_size,
double key_compare_cost,
+ double disk_read_cost,
bool with_addon_fields)
{
DBUG_ASSERT(num_keys_per_buffer != 0);
@@ -162,7 +163,7 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows,
total_cost+=
num_merge_calls *
get_merge_cost(num_keys_per_buffer * MERGEBUFF, MERGEBUFF, elem_size,
- key_compare_cost);
+ key_compare_cost, disk_read_cost);
// # of records in remaining buffers.
last_n_elems+= num_remaining_buffs * num_keys_per_buffer;
@@ -170,7 +171,7 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows,
// Cost of merge sort of remaining buffers.
total_cost+=
get_merge_cost(last_n_elems, 1 + num_remaining_buffs, elem_size,
- key_compare_cost);
+ key_compare_cost, disk_read_cost);
num_buffers= num_merge_calls;
num_keys_per_buffer*= MERGEBUFF;
@@ -179,7 +180,7 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows,
// Simulate final merge_buff call.
last_n_elems+= num_keys_per_buffer * num_buffers;
total_cost+= get_merge_cost(last_n_elems, 1 + num_buffers, elem_size,
- key_compare_cost);
+ key_compare_cost, disk_read_cost);
return total_cost;
}
@@ -238,7 +239,7 @@ void Sort_costs::compute_pq_sort_costs(Sort_param *param, ha_rows num_rows,
{
costs[PQ_SORT_ORDER_BY_FIELDS]=
get_pq_sort_cost(num_rows, queue_size, false) +
- param->sort_form->file->ha_rnd_pos_time(MY_MIN(queue_size - 1, num_rows));
+ param->sort_form->file->ha_rnd_pos_call_time(MY_MIN(queue_size - 1, num_rows));
}
/* Calculate cost with addon fields */
@@ -272,9 +273,10 @@ void Sort_costs::compute_merge_sort_costs(Sort_param *param,
costs[MERGE_SORT_ORDER_BY_FIELDS]=
get_merge_many_buffs_cost_fast(num_rows, num_available_keys,
row_length, DEFAULT_KEY_COMPARE_COST,
+ default_optimizer_costs.disk_read_cost,
false) +
- param->sort_form->file->ha_rnd_pos_time(MY_MIN(param->limit_rows,
- num_rows));
+ param->sort_form->file->ha_rnd_pos_call_time(MY_MIN(param->limit_rows,
+ num_rows));
if (with_addon_fields)
{
@@ -286,6 +288,7 @@ void Sort_costs::compute_merge_sort_costs(Sort_param *param,
costs[MERGE_SORT_ALL_FIELDS]=
get_merge_many_buffs_cost_fast(num_rows, num_available_keys,
row_length, DEFAULT_KEY_COMPARE_COST,
+ DISK_READ_COST_THD(thd),
true);
}
diff --git a/sql/filesort_utils.h b/sql/filesort_utils.h
index b97fc4632c5..73aa2f76a18 100644
--- a/sql/filesort_utils.h
+++ b/sql/filesort_utils.h
@@ -352,6 +352,7 @@ extern const LEX_CSTRING filesort_names[];
double cost_of_filesort(TABLE *table, ORDER *order_by, ha_rows rows_to_read,
ha_rows limit_rows, enum sort_type *used_sort_type);
+double get_qsort_sort_cost(ha_rows num_rows, bool with_addon_fields);
int compare_packed_sort_keys(void *sort_keys, unsigned char **a,
unsigned char **b);
qsort2_cmp get_packed_keys_compare_ptr();
diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc
index 06388968948..f1d5fd7e4d3 100644
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@@ -9737,24 +9737,27 @@ uint ha_partition::get_biggest_used_partition(uint *part_index)
time for scan
*/
-double ha_partition::scan_time()
+IO_AND_CPU_COST ha_partition::scan_time()
{
- double scan_time= 0;
+ IO_AND_CPU_COST scan_time= {0,0};
uint i;
DBUG_ENTER("ha_partition::scan_time");
for (i= bitmap_get_first_set(&m_part_info->read_partitions);
i < m_tot_parts;
i= bitmap_get_next_set(&m_part_info->read_partitions, i))
- scan_time+= m_file[i]->scan_time();
+ {
+ IO_AND_CPU_COST cost= m_file[i]->scan_time();
+ scan_time.io+= cost.io;
+ scan_time.cpu+= cost.cpu;
+ }
if (m_tot_parts)
{
/*
Add TABLE_SCAN_SETUP_COST for partitions to make cost similar to
in ha_scan_time()
*/
- scan_time+= (TABLE_SCAN_SETUP_COST * avg_io_cost() * (m_tot_parts - 1) /
- optimizer_cache_cost);
+ scan_time.cpu+= TABLE_SCAN_SETUP_COST * (m_tot_parts - 1);
}
DBUG_RETURN(scan_time);
}
@@ -9769,34 +9772,78 @@ double ha_partition::scan_time()
@return time for scanning index inx
*/
-double ha_partition::key_scan_time(uint inx)
+IO_AND_CPU_COST ha_partition::key_scan_time(uint inx, ha_rows rows)
{
- double scan_time= 0;
+ IO_AND_CPU_COST scan_time= {0,0};
uint i;
+ uint partitions= bitmap_bits_set(&m_part_info->read_partitions);
+ ha_rows rows_per_part;
DBUG_ENTER("ha_partition::key_scan_time");
+
+ if (partitions == 0)
+ DBUG_RETURN(scan_time);
+ set_if_bigger(rows, 1);
+ rows_per_part= (rows + partitions - 1)/partitions;
+
for (i= bitmap_get_first_set(&m_part_info->read_partitions);
i < m_tot_parts;
i= bitmap_get_next_set(&m_part_info->read_partitions, i))
- scan_time+= m_file[i]->key_scan_time(inx);
+ {
+ IO_AND_CPU_COST cost= m_file[i]->key_scan_time(inx, rows_per_part);
+ scan_time.io+= cost.io;
+ scan_time.cpu+= cost.cpu;
+ }
DBUG_RETURN(scan_time);
}
-double ha_partition::keyread_time(uint inx, uint ranges, ha_rows rows)
+IO_AND_CPU_COST ha_partition::keyread_time(uint inx, ulong ranges, ha_rows rows,
+ ulonglong blocks)
{
- double read_time= 0;
+ IO_AND_CPU_COST read_time= {0,0};
uint i;
+ uint partitions= bitmap_bits_set(&m_part_info->read_partitions);
DBUG_ENTER("ha_partition::keyread_time");
- if (!ranges)
- DBUG_RETURN(handler::keyread_time(inx, ranges, rows));
+ if (partitions == 0)
+ DBUG_RETURN(read_time);
+
+ ha_rows rows_per_part= (rows + partitions - 1)/partitions;
for (i= bitmap_get_first_set(&m_part_info->read_partitions);
i < m_tot_parts;
i= bitmap_get_next_set(&m_part_info->read_partitions, i))
- read_time+= m_file[i]->keyread_time(inx, ranges, rows);
+ {
+ IO_AND_CPU_COST cost= m_file[i]->keyread_time(inx, ranges, rows_per_part,
+ blocks);
+ read_time.io+= cost.io;
+ read_time.cpu+= cost.cpu;
+ }
+ /* Add that we have to do a key lookup for all ranges in all partitions */
+ read_time.cpu= (partitions-1) * ranges * KEY_LOOKUP_COST;
DBUG_RETURN(read_time);
}
+IO_AND_CPU_COST ha_partition::rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST read_time= {0,0};
+ uint i;
+ uint partitions= bitmap_bits_set(&m_part_info->read_partitions);
+ if (partitions == 0)
+ return read_time;
+
+ ha_rows rows_per_part= (rows + partitions - 1)/partitions;
+ for (i= bitmap_get_first_set(&m_part_info->read_partitions);
+ i < m_tot_parts;
+ i= bitmap_get_next_set(&m_part_info->read_partitions, i))
+ {
+ IO_AND_CPU_COST cost= m_file[i]->rnd_pos_time(rows_per_part);
+ read_time.io+= cost.io;
+ read_time.cpu+= cost.cpu;
+ }
+ return read_time;
+}
+
+
/**
Find number of records in a range.
@param inx Index number
@@ -9853,6 +9900,8 @@ ha_rows ha_partition::records_in_range(uint inx, const key_range *min_key,
if (estimated_rows && checked_rows &&
checked_rows >= min_rows_to_check)
{
+ /* We cannot use page ranges when there is more than one partion */
+ *pages= unused_page_range;
DBUG_PRINT("info",
("records_in_range(inx %u): %lu (%lu * %lu / %lu)",
inx,
@@ -9866,6 +9915,8 @@ ha_rows ha_partition::records_in_range(uint inx, const key_range *min_key,
DBUG_PRINT("info", ("records_in_range(inx %u): %lu",
inx,
(ulong) estimated_rows));
+ /* We cannot use page ranges when there is more than one partion */
+ *pages= unused_page_range;
DBUG_RETURN(estimated_rows);
}
@@ -9896,33 +9947,6 @@ ha_rows ha_partition::estimate_rows_upper_bound()
}
-/*
- Get time to read
-
- SYNOPSIS
- read_time()
- index Index number used
- ranges Number of ranges
- rows Number of rows
-
- RETURN VALUE
- time for read
-
- DESCRIPTION
- This will be optimised later to include whether or not the index can
- be used with partitioning. To achieve we need to add another parameter
- that specifies how many of the index fields that are bound in the ranges.
- Possibly added as a new call to handlers.
-*/
-
-double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
-{
- DBUG_ENTER("ha_partition::read_time");
-
- DBUG_RETURN(get_open_file_sample()->read_time(index, ranges, rows));
-}
-
-
/**
Number of rows in table. see handler.h
@@ -12168,6 +12192,18 @@ ha_partition::can_convert_blob(const Field_blob* field,
return true;
}
+/*
+ Get table costs for the current statement that should be stored in
+ handler->cost variables.
+
+ When we want to support many different table handlers, we should set
+ m_file[i]->costs to point to an unique cost structure per open
+ instance and call something similar as
+ TABLE_SHARE::update_optimizer_costs(handlerton *hton) and
+ handler::update_optimizer_costs(&costs) on it.
+*/
+
+
void ha_partition::set_optimizer_costs(THD *thd)
{
handler::set_optimizer_costs(thd);
@@ -12177,6 +12213,17 @@ void ha_partition::set_optimizer_costs(THD *thd)
m_file[i]->set_optimizer_costs(thd);
}
+/*
+ Get unique table costs for the first instance of the handler and store
+ in table->share
+*/
+
+void ha_partition::update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ uint i= bitmap_get_first_set(&m_part_info->read_partitions);
+ m_file[i]->update_optimizer_costs(costs);
+}
+
struct st_mysql_storage_engine partition_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
diff --git a/sql/ha_partition.h b/sql/ha_partition.h
index 985d6d1eaec..9708b5fac86 100644
--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@@ -1031,17 +1031,15 @@ public:
/*
Called in test_quick_select to determine if indexes should be used.
*/
- double scan_time() override;
+ IO_AND_CPU_COST scan_time() override;
- double key_scan_time(uint inx) override;
+ IO_AND_CPU_COST key_scan_time(uint inx, ha_rows rows) override;
- double keyread_time(uint inx, uint ranges, ha_rows rows) override;
+ IO_AND_CPU_COST keyread_time(uint inx, ulong ranges, ha_rows rows,
+ ulonglong blocks) override;
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override;
/*
- The next method will never be called if you do not implement indexes.
- */
- double read_time(uint index, uint ranges, ha_rows rows) override;
- /*
For the given range how many records are estimated to be in this range.
Used by optimiser to calculate cost of using a particular index.
*/
@@ -1645,5 +1643,6 @@ public:
const Field_blob* field,
const Column_definition& new_field) const override;
void set_optimizer_costs(THD *thd);
+ void update_optimizer_costs(OPTIMIZER_COSTS *costs);
};
#endif /* HA_PARTITION_INCLUDED */
diff --git a/sql/handler.cc b/sql/handler.cc
index adb923eed0a..6179496a88e 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -46,6 +46,7 @@
#include "ha_sequence.h"
#include "rowid_filter.h"
#include "mysys_err.h"
+#include "optimizer_defaults.h"
#ifdef WITH_PARTITION_STORAGE_ENGINE
#include "ha_partition.h"
@@ -621,8 +622,44 @@ int ha_finalize_handlerton(st_plugin_int *plugin)
}
-const char *hton_no_exts[]= { 0 };
+/*
+ Get a pointer to the global engine optimizer costs (like
+ innodb.disk_read_cost) and store the pointer in the handlerton.
+
+ This is called once when a handlerton is created.
+ We also update the not set global costs with the default costs
+ to allow information_schema to print the real used values.
+*/
+
+static bool update_optimizer_costs(handlerton *hton)
+{
+ OPTIMIZER_COSTS costs= default_optimizer_costs;
+ LEX_CSTRING *name= hton_name(hton);
+
+ if (hton->update_optimizer_costs)
+ hton->update_optimizer_costs(&costs);
+
+ mysql_mutex_lock(&LOCK_optimizer_costs);
+ hton->optimizer_costs= get_or_create_optimizer_costs(name->str,
+ name->length);
+ if (!hton->optimizer_costs)
+ {
+ mysql_mutex_unlock(&LOCK_optimizer_costs);
+ return 1; // OOM
+ }
+
+ /* Update not set values from current default costs */
+ for (uint i=0 ; i < sizeof(OPTIMIZER_COSTS)/sizeof(double) ; i++)
+ {
+ double *var= ((double*) hton->optimizer_costs)+i;
+ if (*var == OPTIMIZER_COST_UNDEF)
+ *var= ((double*) &costs)[i];
+ }
+ mysql_mutex_unlock(&LOCK_optimizer_costs);
+ return 0;
+}
+const char *hton_no_exts[]= { 0 };
int ha_initialize_handlerton(st_plugin_int *plugin)
{
@@ -725,6 +762,12 @@ int ha_initialize_handlerton(st_plugin_int *plugin)
hton->savepoint_offset= savepoint_alloc_size;
savepoint_alloc_size+= tmp;
hton2plugin[hton->slot]=plugin;
+
+ if (plugin->plugin->type == MYSQL_STORAGE_ENGINE_PLUGIN &&
+ !(hton->flags & HTON_HIDDEN) &&
+ update_optimizer_costs(hton))
+ goto err_deinit;
+
if (hton->prepare)
{
total_ha_2pc++;
@@ -764,7 +807,6 @@ int ha_initialize_handlerton(st_plugin_int *plugin)
resolve_sysvar_table_options(hton);
update_discovery_counters(hton, 1);
-
DBUG_RETURN(0);
err_deinit:
@@ -3222,58 +3264,97 @@ LEX_CSTRING *handler::engine_name()
return hton_name(ht);
}
-
/*
- It is assumed that the value of the parameter 'ranges' can be only 0 or 1.
- If ranges == 1 then the function returns the cost of index only scan
- by index 'keyno' of one range containing 'rows' key entries.
- If ranges == 0 then the function returns only the cost of copying
- those key entries into the engine buffers.
-
- This function doesn't take in account into copying the key to record
- (KEY_COPY_COST) or comparing the key to the where clause (WHERE_COST)
+ Calculate cost for an index scan for given index and number of records.
+
+ @param index Index to use
+ @param ranges Number of ranges (b-tree dives in case of b-tree).
+ Used by partition engine
+ @param rows Number of expected rows
+ @param blocks Number of disk blocks to read (from range optimizer).
+ 0 if not known
+
+ This function does not take in account into looking up the key,
+ copying the key to record and finding the next key. These cost are
+ handled in ha_keyread_time()
*/
-double handler::keyread_time(uint index, uint ranges, ha_rows rows)
+IO_AND_CPU_COST handler::keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
{
- size_t len;
- double cost;
- DBUG_ASSERT(ranges == 0 || ranges == 1);
- len= table->key_info[index].key_length + ref_length;
- if (table->file->is_clustering_key(index))
- len= table->s->stored_rec_length;
+ IO_AND_CPU_COST cost;
+ ulonglong io_blocks= 0;
+ DBUG_ASSERT(ranges > 0);
- cost= ((double)rows*len/(stats.block_size+1) *
- INDEX_BLOCK_COPY_COST(table->in_use));
- /*
- We divide the cost with optimizer_cache_cost as ha_keyread_time()
- and ha_key_scan_time() will multiply the result value with
- optimizer_cache_cost and we want to keep the above 'memory operation'
- cost unaffected by this multiplication.
- */
- cost/= optimizer_cache_cost;
- if (ranges)
+ /* memory engine has stats.block_size == 0 */
+ if (stats.block_size)
{
- uint keys_per_block= (uint) (stats.block_size*3/4/len+1);
- /*
- We let the cost grow slowly in proportion to number of rows to
- promote indexes with less rows.
- We do not calculate exact number of block reads as then index
- only reads will be more costly than normal reads, especially
- compared to InnoDB clustered keys.
-
- KEY_LOOKUP_COST is the cost of finding the first key in the
- range. Finding the next key is usually a fast operation so we
- don't count it here, it is taken into account in
- ha_keyread_and_copy_time()
- */
- cost+= (((double) (rows / keys_per_block) + KEY_LOOKUP_COST) *
- avg_io_cost());
+ if (!blocks)
+ {
+ /* Estimate length of index data */
+ if (rows <= 1) // EQ_REF optimization
+ {
+ blocks= 1;
+ io_blocks= (stats.block_size + IO_SIZE - 1)/ IO_SIZE;
+ }
+ else
+ {
+ size_t len= table->key_storage_length(index);
+ blocks= ((ulonglong) ((rows * len / INDEX_BLOCK_FILL_FACTOR_DIV *
+ INDEX_BLOCK_FILL_FACTOR_MUL +
+ stats.block_size-1)) / stats.block_size +
+ (ranges - 1));
+ io_blocks= blocks * stats.block_size / IO_SIZE;
+ }
+ }
+ else
+ io_blocks= blocks * stats.block_size / IO_SIZE;
}
+ cost.io= (double) io_blocks * avg_io_cost();
+ cost.cpu= blocks * INDEX_BLOCK_COPY_COST;
return cost;
}
+/*
+ Cost of doing a set of range scans and finding the key position.
+ This function is used both with index scans (in which case there should be
+ an additional KEY_COPY_COST) and when normal index + fetch row scan,
+ in which case there should an additional rnd_pos_time() cost.
+*/
+
+double handler::ha_keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+{
+ if (rows < ranges)
+ rows= ranges;
+ IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks);
+ return (cost.io * DISK_READ_RATIO +
+ cost.cpu + ranges * KEY_LOOKUP_COST +
+ (rows - ranges) * KEY_NEXT_FIND_COST);
+}
+
+
+/*
+ Read a row from a clustered index
+
+ Cost is similar to ha_rnd_pos_call_time() as a index_read() on a clusterd
+ key has identical code as rnd_pos() (At least in InnoDB:)
+*/
+
+double handler::ha_keyread_clustered_and_copy_time(uint index, ulong ranges,
+ ha_rows rows,
+ ulonglong blocks)
+{
+ if (rows < ranges)
+ rows= ranges;
+ IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks);
+ return (cost.io * DISK_READ_RATIO +
+ cost.cpu + ranges * ROW_LOOKUP_COST +
+ (rows - ranges) * ROW_NEXT_FIND_COST +
+ rows * ROW_COPY_COST);
+}
+
THD *handler::ha_thd(void) const
{
DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
@@ -3346,7 +3427,7 @@ int handler::ha_open(TABLE *table_arg, const char *name, int mode,
name, ht->db_type, table_arg->db_stat, mode,
test_if_locked));
- table= table_arg;
+ set_table(table_arg);
DBUG_ASSERT(table->s == table_share);
DBUG_ASSERT(m_lock_type == F_UNLCK);
DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK));
@@ -3396,14 +3477,15 @@ int handler::ha_open(TABLE *table_arg, const char *name, int mode,
else
dup_ref=ref+ALIGN_SIZE(ref_length);
cached_table_flags= table_flags();
-
+ if (!table->s->optimizer_costs_inited)
+ {
+ table->s->optimizer_costs_inited=1;
+ /* Copy data from global 'engine'.optimizer_costs to TABLE_SHARE */
+ table->s->update_optimizer_costs(partition_ht());
+ /* Update costs depend on table structure */
+ update_optimizer_costs(&table->s->optimizer_costs);
+ }
/* Copy current optimizer costs. Needed in case clone() is used */
- set_optimizer_costs(table->in_use);
- DBUG_ASSERT(optimizer_key_copy_cost >= 0.0);
- DBUG_ASSERT(optimizer_key_next_find_cost >= 0.0);
- DBUG_ASSERT(optimizer_row_copy_cost >= 0.0);
- DBUG_ASSERT(optimizer_where_cost >= 0.0);
- DBUG_ASSERT(optimizer_key_cmp_cost >= 0.0);
reset_statistics();
}
internal_tmp_table= MY_TEST(test_if_locked & HA_OPEN_INTERNAL_TABLE);
@@ -3435,6 +3517,15 @@ int handler::ha_close(void)
DBUG_RETURN(close());
}
+void handler::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
+{
+ DBUG_ASSERT(table_arg->s == share);
+ table= table_arg;
+ table_share= share;
+ costs= &share->optimizer_costs;
+ reset_statistics();
+}
+
int handler::ha_rnd_next(uchar *buf)
{
@@ -8744,27 +8835,19 @@ Table_scope_and_contents_source_st::fix_period_fields(THD *thd,
}
/*
- Copy common optimizer cost variables to the engine
-
- This is needed to provide fast acccess to these variables during
- optimization (as we refer to them multiple times).
+ Copy upper level cost to the engine as part of start statement
- The other option would be to access them from thd, but that
- would require a function call (as we cannot access THD from
- an inline handler function) and two extra memory accesses
- for each variable.
+ This is needed to provide fast access to these variables during
+ optimization (as we refer to them multiple times during one query).
- index_block_copy_cost is not copied as it is used so seldom.
+ The other option would be to access them from THD, but that would
+ require a function call (as we cannot easily access THD from an
+ inline handler function) and two extra memory accesses for each
+ variable.
*/
-
void handler::set_optimizer_costs(THD *thd)
{
- optimizer_key_copy_cost= thd->variables.optimizer_key_copy_cost;
- optimizer_key_next_find_cost=
- thd->variables.optimizer_key_next_find_cost;
- optimizer_row_copy_cost= thd->variables.optimizer_row_copy_cost;
- optimizer_where_cost= thd->variables.optimizer_where_cost;
- optimizer_key_cmp_cost= thd->variables.optimizer_key_cmp_cost;
- set_optimizer_cache_cost(thd->optimizer_cache_hit_ratio);
+ optimizer_where_cost= thd->variables.optimizer_where_cost;
+ optimizer_scan_setup_cost= thd->variables.optimizer_scan_setup_cost;
}
diff --git a/sql/handler.h b/sql/handler.h
index a5d4248653a..5cdf55a9641 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -26,9 +26,9 @@
#endif
#include "sql_const.h"
-#include "optimizer_costs.h"
#include "sql_basic_types.h"
#include "mysqld.h" /* server_id */
+#include "optimizer_costs.h"
#include "sql_plugin.h" /* plugin_ref, st_plugin_int, plugin */
#include "thr_lock.h" /* thr_lock_type, THR_LOCK_DATA */
#include "sql_cache.h"
@@ -36,6 +36,7 @@
#include "sql_array.h" /* Dynamic_array<> */
#include "mdl.h"
#include "vers_string.h"
+#include "optimizer_costs.h"
#include "sql_analyze_stmt.h" // for Exec_time_tracker
@@ -1046,6 +1047,7 @@ enum enum_schema_tables
SCH_KEY_CACHES,
SCH_KEY_COLUMN_USAGE,
SCH_OPEN_TABLES,
+ SCH_OPTIMIZER_COSTS,
SCH_OPT_TRACE,
SCH_PARAMETERS,
SCH_PARTITIONS,
@@ -1496,6 +1498,10 @@ struct handlerton
/* Called for all storage handlers after ddl recovery is done */
void (*signal_ddl_recovery_done)(handlerton *hton);
+ /* Called at startup to update default engine costs */
+ void (*update_optimizer_costs)(OPTIMIZER_COSTS *costs);
+ void *optimizer_costs; /* Costs are stored here */
+
/*
Optional clauses in the CREATE/ALTER TABLE
*/
@@ -3080,6 +3086,21 @@ enum class Compare_keys : uint32_t
NotEqual
};
+/* Cost for reading a row through an index */
+struct INDEX_READ_COST
+{
+ double read_cost;
+ double index_only_cost;
+};
+
+/* Separated costs for IO and CPU. For handler::keyread_time() */
+struct IO_AND_CPU_COST
+{
+ double io;
+ double cpu;
+};
+
+
/**
The handler class is the interface for dynamically loadable
storage engines. Do not add ifdefs and take care when adding or
@@ -3140,9 +3161,10 @@ protected:
ha_rows estimation_rows_to_insert;
handler *lookup_handler;
public:
- handlerton *ht; /* storage engine of this handler */
- uchar *ref; /* Pointer to current row */
- uchar *dup_ref; /* Pointer to duplicate row */
+ handlerton *ht; /* storage engine of this handler */
+ OPTIMIZER_COSTS *costs; /* Points to table->share->costs */
+ uchar *ref; /* Pointer to current row */
+ uchar *dup_ref; /* Pointer to duplicate row */
uchar *lookup_buffer;
ha_statistics stats;
@@ -3215,15 +3237,6 @@ public:
ulonglong rows_changed;
/* One bigger than needed to avoid to test if key == MAX_KEY */
ulonglong index_rows_read[MAX_KEY+1];
- /*
- Cost of using key/record cache: (100-cache_hit_ratio)/100
- Updated from THD in open_tables()
- */
- double optimizer_cache_cost;
- double optimizer_key_next_find_cost;
- double optimizer_row_copy_cost, optimizer_key_copy_cost;
- double optimizer_where_cost, optimizer_key_cmp_cost;
-
ha_copy_info copy_info;
private:
@@ -3342,13 +3355,15 @@ private:
For non partitioned handlers this is &TABLE_SHARE::ha_share.
*/
Handler_share **ha_share;
+ double optimizer_where_cost; // Copy of THD->...optimzer_where_cost
+ double optimizer_scan_setup_cost; // Copy of THD->...optimzer_scan_...
public:
handler(handlerton *ht_arg, TABLE_SHARE *share_arg)
:table_share(share_arg), table(0),
estimation_rows_to_insert(0),
lookup_handler(this),
- ht(ht_arg), ref(0), lookup_buffer(NULL), end_range(NULL),
+ ht(ht_arg), costs(0), ref(0), lookup_buffer(NULL), end_range(NULL),
implicit_emptied(0),
mark_trx_read_write_done(0),
check_table_binlog_row_based_done(0),
@@ -3359,7 +3374,6 @@ public:
ref_length(sizeof(my_off_t)),
ft_handler(0), inited(NONE), pre_inited(NONE),
pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0),
- optimizer_cache_cost((100-DEFAULT_CACHE_HIT_RATIO)/100.0),
tracker(NULL),
pushed_idx_cond(NULL),
pushed_idx_cond_keyno(MAX_KEY),
@@ -3373,12 +3387,19 @@ public:
m_psi_numrows(0),
m_psi_locker(NULL),
row_logging(0), row_logging_init(0),
- m_lock_type(F_UNLCK), ha_share(NULL)
+ m_lock_type(F_UNLCK), ha_share(NULL), optimizer_where_cost(0),
+ optimizer_scan_setup_cost(0)
{
DBUG_PRINT("info",
("handler created F_UNLCK %d F_RDLCK %d F_WRLCK %d",
F_UNLCK, F_RDLCK, F_WRLCK));
reset_statistics();
+ /*
+ The following variables should be updated in set_optimizer_costs()
+ which is to be run as part of setting up the table for the query
+ */
+ MEM_UNDEFINED(&optimizer_where_cost, sizeof(optimizer_where_cost));
+ MEM_UNDEFINED(&optimizer_scan_setup_cost, sizeof(optimizer_scan_setup_cost));
}
virtual ~handler(void)
{
@@ -3579,22 +3600,22 @@ public:
bzero(&copy_info, sizeof(copy_info));
reset_copy_info();
}
- virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
- {
- table= table_arg;
- table_share= share;
- reset_statistics();
- }
+ virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share);
/*
Time for a full table data scan. To be overrided by engines, should not
be used by the sql level.
*/
protected:
- virtual double scan_time()
+ virtual IO_AND_CPU_COST scan_time()
{
- return (((ulonglong2double(stats.data_file_length) / stats.block_size)) *
- avg_io_cost());
+ IO_AND_CPU_COST cost;
+ ulonglong length= stats.data_file_length;
+ cost.io= (double) (length / IO_SIZE) * avg_io_cost();
+ cost.cpu= (!stats.block_size ? 0.0 :
+ (double) ((length + stats.block_size-1)/stats.block_size) *
+ INDEX_BLOCK_COPY_COST);
+ return cost;
}
public:
@@ -3610,147 +3631,149 @@ public:
a few rows and the extra cost has no practical effect.
*/
- inline double ha_scan_time()
+ inline double ha_scan_time(ha_rows rows)
{
- return (scan_time() * optimizer_cache_cost +
- TABLE_SCAN_SETUP_COST * avg_io_cost());
+ IO_AND_CPU_COST cost= scan_time();
+ return (cost.io * DISK_READ_RATIO +
+ cost.cpu + TABLE_SCAN_SETUP_COST +
+ (double) rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST));
}
/*
- Time for a full table scan, fetching the rows from the table and comparing
- the row with the where clause
+ Time for a full table scan, fetching the rows from the table and comparing
+ the row with the where clause
*/
- inline double ha_scan_and_compare_time(ha_rows records)
+ inline double ha_scan_and_compare_time(ha_rows rows)
{
- return (ha_scan_time() +
- (double) records * (ROW_COPY_COST + WHERE_COST));
+ return ha_scan_time(rows) + (double) rows * WHERE_COST;
}
+ /* Cost of (random) reading a block of IO_SIZE */
virtual double avg_io_cost()
{
- return 1.0;
+ return DISK_READ_COST;
}
- virtual void set_optimizer_costs(THD *thd);
-
/*
- Set cost for finding a row in the engine cache
- This allows the handler to override the cost if there is no
- caching of rows, like in heap or federatedx.
+ Update table->share optimizer costs for this particular table.
+ Called once when table is opened the first time.
*/
- virtual void set_optimizer_cache_cost(double cost)
- {
- optimizer_cache_cost= cost;
- }
-
- /**
- The cost of reading a set of ranges from the table using an index
- to access it.
-
- @param index The index number.
- @param ranges The number of ranges to be read. If 0, it means that
- we calculate separately the cost of reading the key.
- @param rows Total number of rows to be read.
-
- This method can be used to calculate the total cost of scanning a table
- using an index by calling it using read_time(index, 1, table_size).
+ virtual void update_optimizer_costs(OPTIMIZER_COSTS *costs) {}
- This function is to be reimplemented by engines (if needed). The sql_level
- should call ha_read_time(), ha_read_and_copy_time() or
- ha_read_and_compare_time().
+ /*
+ Set handler optimizer cost variables.
+ Called for each table used by the statment
+ This is virtual mainly for the partition engine.
*/
+ virtual void set_optimizer_costs(THD *thd);
+
protected:
- virtual double read_time(uint index, uint ranges, ha_rows rows)
+ /*
+ Cost of reading 'rows' number of rows with a rowid
+ */
+ virtual IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
{
- return ((rows2double(rows) * ROW_LOOKUP_COST +
- rows2double(ranges) * KEY_LOOKUP_COST) * avg_io_cost());
+ double r= rows2double(rows);
+ return
+ {
+ r * avg_io_cost() * stats.block_size/IO_SIZE, // Blocks read
+ r * INDEX_BLOCK_COPY_COST // Copy block from cache
+ };
}
public:
- /* Same as above, but take into account CACHE_COST */
- inline double ha_read_time(uint index, uint ranges, ha_rows rows)
- {
- return read_time(index, ranges, rows) * optimizer_cache_cost;
- }
+ /*
+ Time for doing and internal rnd_pos() inside the engine. For some
+ engine, this is more efficient than the SQL layer calling
+ rnd_pos() as there is no overhead in converting/checking the
+ rnd_pos_value. This is used when calculating the cost of fetching
+ a key+row in one go (like when scanning an index and fetching the
+ row).
+ */
- /* Same as above, but take into account also copying of the row to 'record' */
- inline double ha_read_and_copy_time(uint index, uint ranges, ha_rows rows)
+ inline double ha_rnd_pos_time(ha_rows rows)
{
- return (ha_read_time(index, ranges, rows) +
- rows2double(rows) * ROW_COPY_COST);
+ IO_AND_CPU_COST cost= rnd_pos_time(rows);
+ return (cost.io * DISK_READ_RATIO +
+ cost.cpu + rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST));
}
- /* Same as above, but take into account also copying and comparing the row */
- inline double ha_read_and_compare_time(uint index, uint ranges, ha_rows rows)
+ /*
+ This cost if when we are calling rnd_pos() explict in the call
+ For the moment this function is identical to ha_rnd_pos time,
+ but that may change in the future after we do more cost checks for
+ more engines.
+ */
+ inline double ha_rnd_pos_call_time(ha_rows rows)
{
- return (ha_read_time(index, ranges, rows) +
- rows2double(rows) * (ROW_COPY_COST + WHERE_COST));
+ IO_AND_CPU_COST cost= rnd_pos_time(rows);
+ return (cost.io * DISK_READ_RATIO +
+ cost.cpu + rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST));
}
- /* Cost of reading a row with rowid */
-protected:
- virtual double rnd_pos_time(ha_rows rows)
+ inline double ha_rnd_pos_call_and_compare_time(ha_rows rows)
{
- return rows2double(rows) * ROW_LOOKUP_COST * avg_io_cost();
- }
-public:
- /*
- Same as above, but take into account cache_cost and copying of the row
- to 'record'.
- Note that this should normally be same as ha_read_time(some_key, 0, rows)
- */
- inline double ha_rnd_pos_time(ha_rows rows)
- {
- return (rnd_pos_time(rows) * optimizer_cache_cost +
- rows2double(rows) * ROW_COPY_COST);
+ return (ha_rnd_pos_call_time(rows) + rows2double(rows) * WHERE_COST);
}
/**
- Calculate cost of 'index_only' scan for given index and number of records.
-
- @param index Index to read
- @param flag If flag == 1 then the function returns the cost of
- index only scan by index 'index' of one range containing
- 'rows' key entries.
- If flag == 0 then function returns only the cost of copying
- those key entries into the engine buffers.
- @param rows #of records to read
+ Calculate cost of 'index_only' scan for given index, a number of reanges
+ and number of records.
+
+ @param index Index to read
+ @param rows #of records to read
+ @param blocks Number of IO blocks that needs to be accessed.
+ 0 if not known (in which case it's calculated)
*/
protected:
- virtual double keyread_time(uint index, uint flag, ha_rows rows);
+ virtual IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks);
public:
/*
Calculate cost of 'keyread' scan for given index and number of records
including fetching the key to the 'record' buffer.
*/
+ double ha_keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks);
- inline double ha_keyread_time(uint index, uint flag, ha_rows rows)
+ /* Same as above, but take into account copying the key the the SQL layer */
+ inline double ha_keyread_and_copy_time(uint index, ulong ranges,
+ ha_rows rows, ulonglong blocks)
{
- return (keyread_time(index, flag, rows) * optimizer_cache_cost);
+ return (ha_keyread_time(index, ranges, rows, blocks) +
+ (double) rows * KEY_COPY_COST);
}
- /* Same as above, but take into account copying the key the the SQL layer */
- inline double ha_keyread_and_copy_time(uint index, uint flag, ha_rows rows)
+ inline double ha_keyread_and_compare_time(uint index, ulong ranges,
+ ha_rows rows, ulonglong blocks)
{
- return ha_keyread_time(index, flag, rows) + (double) rows * KEY_COPY_COST;
+ return (ha_keyread_time(index, ranges, rows, blocks) +
+ (double) rows * (KEY_COPY_COST + WHERE_COST));
}
+ double ha_keyread_clustered_and_copy_time(uint index, ulong ranges,
+ ha_rows rows,
+ ulonglong blocks);
/*
Time for a full table index scan (without copy or compare cost).
To be overrided by engines, sql level should use ha_key_scan_time().
+ Note that IO_AND_CPU_COST does not include avg_io_cost() !
*/
protected:
- virtual double key_scan_time(uint index)
+ virtual IO_AND_CPU_COST key_scan_time(uint index, ha_rows rows)
{
- return keyread_time(index, 1, records());
+ return keyread_time(index, 1, MY_MAX(rows, 1), 0);
}
public:
/* Cost of doing a full index scan */
- inline double ha_key_scan_time(uint index)
+ inline double ha_key_scan_time(uint index, ha_rows rows)
{
- return (key_scan_time(index) * optimizer_cache_cost);
+ IO_AND_CPU_COST cost= key_scan_time(index, rows);
+ return (cost.io * DISK_READ_RATIO +
+ cost.cpu + INDEX_SCAN_SETUP_COST + KEY_LOOKUP_COST +
+ (double) rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST));
}
/*
@@ -3759,8 +3782,7 @@ public:
*/
inline double ha_key_scan_and_compare_time(uint index, ha_rows rows)
{
- return (ha_key_scan_time(index) +
- (double) rows * (KEY_COPY_COST + WHERE_COST));
+ return ha_key_scan_time(index, rows) + (double) rows * WHERE_COST;
}
virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
@@ -5213,7 +5235,7 @@ public:
ha_share= arg_ha_share;
return false;
}
- void set_table(TABLE* table_arg) { table= table_arg; }
+ inline void set_table(TABLE* table_arg);
int get_lock_type() const { return m_lock_type; }
public:
/* XXX to be removed, see ha_partition::partition_ht() */
@@ -5297,7 +5319,7 @@ protected:
void unlock_shared_ha_data();
/*
- Mroonga needs to call read_time() directly for it's internal handler
+ Mroonga needs to call some xxx_time() directly for it's internal handler
methods
*/
friend class ha_mroonga;
diff --git a/sql/item_func.cc b/sql/item_func.cc
index a07595cbbd8..2f110406a6d 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -5920,6 +5920,7 @@ bool Item_func_get_system_var::fix_length_and_dec(THD *thd)
decimals=0;
break;
case SHOW_DOUBLE:
+ case SHOW_OPTIMIZER_COST:
decimals= 6;
collation= DTCollation_numeric();
fix_char_length(DBL_DIG + 6);
@@ -5977,6 +5978,7 @@ const Type_handler *Item_func_get_system_var::type_handler() const
case SHOW_CHAR_PTR:
case SHOW_LEX_STRING:
return &type_handler_varchar;
+ case SHOW_OPTIMIZER_COST:
case SHOW_DOUBLE:
return &type_handler_double;
default:
diff --git a/sql/json_table.cc b/sql/json_table.cc
index 05ee83bd3d8..949175d8027 100644
--- a/sql/json_table.cc
+++ b/sql/json_table.cc
@@ -54,6 +54,7 @@ public:
bzero(&m_hton, sizeof(m_hton));
m_hton.tablefile_extensions= hton_no_exts;
m_hton.slot= HA_SLOT_UNDEF;
+ m_hton.flags= HTON_HIDDEN;
}
};
@@ -245,6 +246,10 @@ public:
int open(const char *name, int mode, uint test_if_locked) override
{ return 0; }
int close(void) override { return 0; }
+ void update_optimizer_costs(OPTIMIZER_COSTS *costs)
+ {
+ memcpy(costs, &heap_optimizer_costs, sizeof(*costs));
+ }
int rnd_init(bool scan) override;
int rnd_next(uchar *buf) override;
int rnd_pos(uchar * buf, uchar *pos) override;
diff --git a/sql/keycaches.cc b/sql/keycaches.cc
index 10bec7c1de8..250a287e229 100644
--- a/sql/keycaches.cc
+++ b/sql/keycaches.cc
@@ -15,6 +15,10 @@
#include "mariadb.h"
#include "keycaches.h"
+#include "optimizer_costs.h"
+#include "optimizer_defaults.h"
+#include "handler.h"
+#include "sql_class.h"
/****************************************************************************
Named list handling
@@ -22,10 +26,13 @@
NAMED_ILIST key_caches;
NAMED_ILIST rpl_filters;
+NAMED_ILIST linked_optimizer_costs;
extern "C" PSI_memory_key key_memory_KEY_CACHE;
extern PSI_memory_key key_memory_NAMED_ILINK_name;
+LEX_CSTRING default_base= {STRING_WITH_LEN("default")};
+
/**
ilink (intrusive list element) with a name
*/
@@ -46,7 +53,7 @@ public:
}
inline bool cmp(const char *name_cmp, size_t length)
{
- return length == name_length && !memcmp(name, name_cmp, length);
+ return !system_charset_info->strnncoll(name, name_length, name_cmp, length);
}
~NAMED_ILINK()
{
@@ -72,7 +79,8 @@ uchar* find_named(I_List<NAMED_ILINK> *list, const char *name, size_t length,
}
-bool NAMED_ILIST::delete_element(const char *name, size_t length, void (*free_element)(const char *name, void*))
+bool NAMED_ILIST::delete_element(const char *name, size_t length,
+ void (*free_element)(const char *name, void*))
{
I_List_iterator<NAMED_ILINK> it(*this);
NAMED_ILINK *element;
@@ -104,14 +112,12 @@ void NAMED_ILIST::delete_elements(void (*free_element)(const char *name, void*))
/* Key cache functions */
-LEX_CSTRING default_key_cache_base= {STRING_WITH_LEN("default")};
-
KEY_CACHE zero_key_cache; ///< @@nonexistent_cache.param->value_ptr() points here
KEY_CACHE *get_key_cache(const LEX_CSTRING *cache_name)
{
if (!cache_name || ! cache_name->length)
- cache_name= &default_key_cache_base;
+ cache_name= &default_base;
return ((KEY_CACHE*) find_named(&key_caches,
cache_name->str, cache_name->length, 0));
}
@@ -234,3 +240,128 @@ void free_all_rpl_filters()
{
rpl_filters.delete_elements(free_rpl_filter);
}
+
+
+/******************************************************************************
+ Optimizer costs functions
+******************************************************************************/
+
+LEX_CSTRING default_costs_base= {STRING_WITH_LEN("default")};
+
+OPTIMIZER_COSTS default_optimizer_costs=
+{
+ DEFAULT_DISK_READ_COST, // disk_read_cost
+ DEFAULT_INDEX_BLOCK_COPY_COST, // index_block_copy_cost
+ DEFAULT_WHERE_COST/4, // key_cmp_cost
+ DEFAULT_KEY_COPY_COST, // key_copy_cost
+ DEFAULT_KEY_LOOKUP_COST, // key_lookup_cost
+ DEFAULT_KEY_NEXT_FIND_COST, // key_next_find_cost
+ DEFAULT_DISK_READ_RATIO, // disk_read_ratio
+ DEFAULT_ROW_COPY_COST, // row_copy_cost
+ DEFAULT_ROW_LOOKUP_COST, // row_lookup_cost
+ DEFAULT_ROW_NEXT_FIND_COST, // row_next_find_cost
+ DEFAULT_ROWID_COMPARE_COST, // rowid_compare_cost
+ DEFAULT_ROWID_COPY_COST, // rowid_copy_cost
+ 1 // Cannot be deleted
+};
+
+OPTIMIZER_COSTS heap_optimizer_costs, tmp_table_optimizer_costs;
+
+OPTIMIZER_COSTS *get_optimizer_costs(const LEX_CSTRING *cache_name)
+{
+ if (!cache_name->length)
+ return &default_optimizer_costs;
+ return ((OPTIMIZER_COSTS*) find_named(&linked_optimizer_costs,
+ cache_name->str, cache_name->length,
+ 0));
+}
+
+OPTIMIZER_COSTS *create_optimizer_costs(const char *name, size_t length)
+{
+ OPTIMIZER_COSTS *optimizer_costs;
+ DBUG_ENTER("create_optimizer_costs");
+ DBUG_PRINT("enter",("name: %.*s", (int) length, name));
+
+ if ((optimizer_costs= (OPTIMIZER_COSTS*)
+ my_malloc(key_memory_KEY_CACHE,
+ sizeof(OPTIMIZER_COSTS), MYF(MY_ZEROFILL | MY_WME))))
+ {
+ if (!new NAMED_ILINK(&linked_optimizer_costs, name, length,
+ (uchar*) optimizer_costs))
+ {
+ my_free(optimizer_costs);
+ optimizer_costs= 0;
+ }
+ else
+ {
+ /* Mark that values are not yet set */
+ for (uint i=0 ; i < sizeof(OPTIMIZER_COSTS)/sizeof(double) ; i++)
+ ((double*) optimizer_costs)[i]= OPTIMIZER_COST_UNDEF;
+ }
+ }
+ DBUG_RETURN(optimizer_costs);
+}
+
+
+OPTIMIZER_COSTS *get_or_create_optimizer_costs(const char *name, size_t length)
+{
+ LEX_CSTRING optimizer_costs_name;
+ OPTIMIZER_COSTS *optimizer_costs;
+
+ optimizer_costs_name.str= name;
+ optimizer_costs_name.length= length;
+ if (!(optimizer_costs= get_optimizer_costs(&optimizer_costs_name)))
+ optimizer_costs= create_optimizer_costs(name, length);
+ return optimizer_costs;
+}
+
+extern "C"
+{
+bool process_optimizer_costs(process_optimizer_costs_t func, TABLE *param)
+{
+ I_List_iterator<NAMED_ILINK> it(linked_optimizer_costs);
+ NAMED_ILINK *element;
+ int res= 0;
+
+ while ((element= it++))
+ {
+ LEX_CSTRING name= { element->name, element->name_length };
+ OPTIMIZER_COSTS *costs= (OPTIMIZER_COSTS *) element->data;
+ res |= func(&name, costs, param);
+ }
+ return res != 0;
+}
+}
+
+bool create_default_optimizer_costs()
+{
+ return (new NAMED_ILINK(&linked_optimizer_costs,
+ default_base.str, default_base.length,
+ (uchar*) &default_optimizer_costs)) == 0;
+}
+
+
+/*
+ Make a copy of heap and tmp_table engine costs to be able to create
+ internal temporary tables without taking a mutex.
+*/
+
+void copy_tmptable_optimizer_costs()
+{
+ memcpy(&heap_optimizer_costs, heap_hton->optimizer_costs,
+ sizeof(heap_optimizer_costs));
+ memcpy(&tmp_table_optimizer_costs, TMP_ENGINE_HTON->optimizer_costs,
+ sizeof(tmp_table_optimizer_costs));
+}
+
+
+static void free_optimizer_costs(const char *name, void *cost)
+{
+ if ((OPTIMIZER_COSTS*) cost != &default_optimizer_costs)
+ my_free(cost);
+}
+
+void free_all_optimizer_costs()
+{
+ linked_optimizer_costs.delete_elements(free_optimizer_costs);
+}
diff --git a/sql/keycaches.h b/sql/keycaches.h
index 68c3dd3a2b0..721251b6745 100644
--- a/sql/keycaches.h
+++ b/sql/keycaches.h
@@ -35,7 +35,7 @@ class NAMED_ILIST: public I_List<NAMED_ILINK>
};
/* For key cache */
-extern LEX_CSTRING default_key_cache_base;
+extern LEX_CSTRING default_base;
extern KEY_CACHE zero_key_cache;
extern NAMED_ILIST key_caches;
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index 234c4cdfd7a..d4103f669fa 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -20,6 +20,7 @@
#include "key.h"
#include "sql_statistics.h"
#include "rowid_filter.h"
+#include "optimizer_defaults.h"
/****************************************************************************
* Default MRR implementation (MRR to non-MRR converter)
@@ -302,46 +303,37 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
if (total_rows != HA_POS_ERROR)
{
- double io_cost= avg_io_cost();
- double range_lookup_cost= (io_cost * KEY_LOOKUP_COST *
- optimizer_cache_cost);
+ double key_cost;
set_if_smaller(total_rows, max_rows);
/* The following calculation is the same as in multi_range_read_info(): */
*flags |= HA_MRR_USE_DEFAULT_IMPL;
cost->reset();
- cost->avg_io_cost= cost->idx_avg_io_cost= io_cost;
+ cost->avg_io_cost= cost->idx_avg_io_cost= 0; // Not used!
if (!is_clustering_key(keyno))
{
- cost->idx_io_count= (double) io_blocks;
+ key_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
+ cost->idx_cpu_cost= key_cost;
+
if (!(*flags & HA_MRR_INDEX_ONLY))
{
- cost->idx_cpu_cost= (ha_keyread_time(keyno, 1, total_rows) +
- (n_ranges-1) * range_lookup_cost);
- cost->cpu_cost= ha_read_time(keyno, 0, total_rows);
- cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
+ /* ha_rnd_pos_time includes ROW_COPY_COST */
+ cost->cpu_cost= ha_rnd_pos_time(total_rows);
}
else
{
/* Index only read */
- cost->idx_cpu_cost= (ha_keyread_time(keyno, 1, total_rows) +
- (n_ranges-1) * range_lookup_cost);
- cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST;
+ cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST;
}
}
else
{
- /*
- Clustered index
- If all index dives are to a few blocks, then limit the
- ranges used by read_time to the number of dives.
- */
+ /* Clustered index */
io_blocks+= unassigned_single_point_ranges;
- uint limited_ranges= (uint) MY_MIN((ulonglong) n_ranges, io_blocks);
- cost->idx_cpu_cost= limited_ranges * range_lookup_cost;
- cost->cpu_cost= ha_read_time(keyno, 0, total_rows);
- cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
+ key_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks);
+ cost->idx_cpu_cost= key_cost;
+ cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
}
cost->comp_cost= (rows2double(total_rows) * WHERE_COST +
MULTI_RANGE_READ_SETUP_COST);
@@ -378,7 +370,7 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
@param keyno Index number
@param n_ranges Estimated number of ranges (i.e. intervals) in the
range sequence.
- @param n_rows Estimated total number of records contained within all
+ @param total_rows Estimated total number of records contained within all
of the ranges
@param bufsz INOUT IN: Size of the buffer available for use
OUT: Size of the buffer that will be actually used, or
@@ -393,7 +385,7 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
other Error or can't perform the requested scan
*/
-ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
+ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint total_rows,
uint key_parts, uint *bufsz,
uint *flags, Cost_estimate *cost)
{
@@ -410,38 +402,27 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
/* Produce the same cost as non-MRR code does */
if (!is_clustering_key(keyno))
{
- double range_lookup_cost= (avg_io_cost() * KEY_LOOKUP_COST *
- optimizer_cache_cost);
- /*
- idx_io_count could potentially be increased with the number of
- index leaf blocks we have to read for finding n_rows.
- */
- cost->idx_io_count= n_ranges;
+ double key_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0);
+ cost->idx_cpu_cost= key_cost;
+
if (!(*flags & HA_MRR_INDEX_ONLY))
{
- cost->idx_cpu_cost= (keyread_time(keyno, 1, n_rows) +
- (n_ranges-1) * range_lookup_cost);
- cost->cpu_cost= read_time(keyno, 0, n_rows);
- cost->copy_cost= rows2double(n_rows) * ROW_COPY_COST;
+ /* ha_rnd_pos_time includes ROW_COPY_COST */
+ cost->cpu_cost= ha_rnd_pos_time(total_rows);
}
else
{
- /*
- Same as above, but take into account copying the key to the upper
- level.
- */
- cost->idx_cpu_cost= (keyread_time(keyno, 1, n_rows) +
- (n_ranges-1) * range_lookup_cost);
- cost->copy_cost= rows2double(n_rows) * KEY_COPY_COST;
+ /* Index only read */
+ cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST;
}
}
else
{
/* Clustering key */
- cost->cpu_cost= read_time(keyno, n_ranges, n_rows);
- cost->copy_cost= rows2double(n_rows) * ROW_COPY_COST;
+ cost->cpu_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0);
+ cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST;
}
- cost->comp_cost= rows2double(n_rows) * WHERE_COST;
+ cost->comp_cost= rows2double(total_rows) * WHERE_COST;
return 0;
}
@@ -2043,7 +2024,7 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
cost->mem_cost= (double)rows_in_last_step * elem_size;
/* Total cost of all index accesses */
- index_read_cost= primary_file->ha_keyread_and_copy_time(keynr, 1, rows);
+ index_read_cost= primary_file->ha_keyread_and_copy_time(keynr, 1, rows, 0);
cost->add_io(index_read_cost, 1 /* Random seeks */);
return FALSE;
}
@@ -2081,42 +2062,6 @@ void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
/**
Get cost of reading nrows table records in a "disk sweep"
- A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
- for an ordered sequence of rowids.
-
- We assume hard disk IO. The read is performed as follows:
-
- 1. The disk head is moved to the needed cylinder
- 2. The controller waits for the plate to rotate
- 3. The data is transferred
-
- Time to do #3 is insignificant compared to #2+#1.
-
- Time to move the disk head is proportional to head travel distance.
-
- Time to wait for the plate to rotate depends on whether the disk head
- was moved or not.
-
- If disk head wasn't moved, the wait time is proportional to distance
- between the previous block and the block we're reading.
-
- If the head was moved, we don't know how much we'll need to wait for the
- plate to rotate. We assume the wait time to be a variate with a mean of
- 0.5 of full rotation time.
-
- Our cost units are "random disk seeks". The cost of random disk seek is
- actually not a constant, it depends one range of cylinders we're going
- to access. We make it constant by introducing a fuzzy concept of "typical
- datafile length" (it's fuzzy as it's hard to tell whether it should
- include index file, temp.tables etc). Then random seek cost is:
-
- 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
-
- We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
-
- If handler::avg_io_cost() < 1.0, then we will trust the handler
- when it comes to the average cost (this is for example true for HEAP).
-
@param table Table to be accessed
@param nrows Number of rows to retrieve
@param interrupted TRUE <=> Assume that the disk sweep will be
@@ -2131,8 +2076,7 @@ void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
cost->reset();
#ifndef OLD_SWEEP_COST
- cost->cpu_cost= table->file->ha_rnd_pos_time(nrows);
- cost->avg_io_cost= table->file->avg_io_cost();
+ cost->cpu_cost= table->file->ha_rnd_pos_call_time(nrows);
#else
if (table->file->pk_is_clustering_key(table->s->primary_key))
{
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index c7c21dcf31c..204da6408ed 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -52,6 +52,7 @@
#include "sql_expression_cache.h" // subquery_cache_miss, subquery_cache_hit
#include "sys_vars_shared.h"
#include "ddl_log.h"
+#include "optimizer_defaults.h"
#include <m_ctype.h>
#include <my_dir.h>
@@ -732,7 +733,7 @@ mysql_mutex_t LOCK_prepared_stmt_count;
#ifdef HAVE_OPENSSL
mysql_mutex_t LOCK_des_key_file;
#endif
-mysql_mutex_t LOCK_backup_log;
+mysql_mutex_t LOCK_backup_log, LOCK_optimizer_costs;
mysql_rwlock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave;
mysql_rwlock_t LOCK_ssl_refresh;
mysql_rwlock_t LOCK_all_status_vars;
@@ -902,7 +903,7 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list,
key_LOCK_crypt, key_LOCK_delayed_create,
key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log,
key_LOCK_gdl, key_LOCK_global_system_variables,
- key_LOCK_manager, key_LOCK_backup_log,
+ key_LOCK_manager, key_LOCK_backup_log, key_LOCK_optimizer_costs,
key_LOCK_prepared_stmt_count,
key_LOCK_rpl_status, key_LOCK_server_started,
key_LOCK_status, key_LOCK_temp_pool,
@@ -965,6 +966,7 @@ static PSI_mutex_info all_server_mutexes[]=
{ &key_hash_filo_lock, "hash_filo::lock", 0},
{ &key_LOCK_active_mi, "LOCK_active_mi", PSI_FLAG_GLOBAL},
{ &key_LOCK_backup_log, "LOCK_backup_log", PSI_FLAG_GLOBAL},
+ { &key_LOCK_optimizer_costs, "LOCK_optimizer_costs", PSI_FLAG_GLOBAL},
{ &key_LOCK_temp_pool, "LOCK_temp_pool", PSI_FLAG_GLOBAL},
{ &key_LOCK_thread_id, "LOCK_thread_id", PSI_FLAG_GLOBAL},
{ &key_LOCK_crypt, "LOCK_crypt", PSI_FLAG_GLOBAL},
@@ -2005,6 +2007,7 @@ static void clean_up(bool print_message)
mdl_destroy();
dflt_key_cache= 0;
key_caches.delete_elements(free_key_cache);
+ free_all_optimizer_costs();
wt_end();
multi_keycache_free();
sp_cache_end();
@@ -2127,6 +2130,7 @@ static void clean_up_mutexes()
mysql_mutex_destroy(&LOCK_active_mi);
mysql_rwlock_destroy(&LOCK_ssl_refresh);
mysql_mutex_destroy(&LOCK_backup_log);
+ mysql_mutex_destroy(&LOCK_optimizer_costs);
mysql_mutex_destroy(&LOCK_temp_pool);
mysql_rwlock_destroy(&LOCK_sys_init_connect);
mysql_rwlock_destroy(&LOCK_sys_init_slave);
@@ -4516,6 +4520,8 @@ static int init_thread_environment()
mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered,
MY_MUTEX_INIT_SLOW);
mysql_mutex_init(key_LOCK_backup_log, &LOCK_backup_log, MY_MUTEX_INIT_FAST);
+ mysql_mutex_init(key_LOCK_optimizer_costs, &LOCK_optimizer_costs,
+ MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_LOCK_temp_pool, &LOCK_temp_pool, MY_MUTEX_INIT_FAST);
#ifdef HAVE_OPENSSL
@@ -5435,6 +5441,7 @@ static int init_server_components()
unireg_abort(1);
}
#endif
+ copy_tmptable_optimizer_costs();
#ifdef WITH_WSREP
/*
@@ -7826,12 +7833,17 @@ static int mysql_init_variables(void)
strnmov(server_version, MYSQL_SERVER_VERSION, sizeof(server_version)-1);
thread_cache.init();
key_caches.empty();
- if (!(dflt_key_cache= get_or_create_key_cache(default_key_cache_base.str,
- default_key_cache_base.length)))
+ if (!(dflt_key_cache= get_or_create_key_cache(default_base.str,
+ default_base.length)))
{
sql_print_error("Cannot allocate the keycache");
return 1;
}
+ if (create_default_optimizer_costs())
+ {
+ sql_print_error("Cannot allocate optimizer_costs");
+ return 1;
+ }
/* set key_cache_hash.default_value = dflt_key_cache */
multi_keycache_init();
@@ -8412,11 +8424,14 @@ mysqld_get_one_option(const struct my_option *opt, const char *argument,
}
-/** Handle arguments for multiple key caches. */
+/**
+ Handle arguments for multiple key caches, replication_options and
+ optimizer_costs
+ */
C_MODE_START
-static void*
+static void *
mysql_getopt_value(const char *name, uint length,
const struct my_option *option, int *error)
{
@@ -8454,6 +8469,7 @@ mysql_getopt_value(const char *name, uint length,
}
/* We return in all cases above. Let us silence -Wimplicit-fallthrough */
DBUG_ASSERT(0);
+ break;
#ifdef HAVE_REPLICATION
/* fall through */
case OPT_REPLICATE_DO_DB:
@@ -8481,11 +8497,87 @@ mysql_getopt_value(const char *name, uint length,
}
return 0;
}
-#endif
+ break;
+#endif
+ case OPT_COSTS_DISK_READ_COST:
+ case OPT_COSTS_INDEX_BLOCK_COPY_COST:
+ case OPT_COSTS_KEY_CMP_COST:
+ case OPT_COSTS_KEY_COPY_COST:
+ case OPT_COSTS_KEY_LOOKUP_COST:
+ case OPT_COSTS_KEY_NEXT_FIND_COST:
+ case OPT_COSTS_DISK_READ_RATIO:
+ case OPT_COSTS_ROW_COPY_COST:
+ case OPT_COSTS_ROW_LOOKUP_COST:
+ case OPT_COSTS_ROW_NEXT_FIND_COST:
+ case OPT_COSTS_ROWID_CMP_COST:
+ case OPT_COSTS_ROWID_COPY_COST:
+ {
+ OPTIMIZER_COSTS *costs;
+ if (unlikely(!(costs= get_or_create_optimizer_costs(name, length))))
+ {
+ if (error)
+ *error= EXIT_OUT_OF_MEMORY;
+ return 0;
+ }
+ switch (option->id) {
+ case OPT_COSTS_DISK_READ_COST:
+ return &costs->disk_read_cost;
+ case OPT_COSTS_INDEX_BLOCK_COPY_COST:
+ return &costs->index_block_copy_cost;
+ case OPT_COSTS_KEY_CMP_COST:
+ return &costs->key_cmp_cost;
+ case OPT_COSTS_KEY_COPY_COST:
+ return &costs->key_copy_cost;
+ case OPT_COSTS_KEY_LOOKUP_COST:
+ return &costs->key_lookup_cost;
+ case OPT_COSTS_KEY_NEXT_FIND_COST:
+ return &costs->key_next_find_cost;
+ case OPT_COSTS_DISK_READ_RATIO:
+ return &costs->disk_read_ratio;
+ case OPT_COSTS_ROW_COPY_COST:
+ return &costs->row_copy_cost;
+ case OPT_COSTS_ROW_LOOKUP_COST:
+ return &costs->row_lookup_cost;
+ case OPT_COSTS_ROW_NEXT_FIND_COST:
+ return &costs->row_next_find_cost;
+ case OPT_COSTS_ROWID_CMP_COST:
+ return &costs->rowid_cmp_cost;
+ case OPT_COSTS_ROWID_COPY_COST:
+ return &costs->rowid_copy_cost;
+ default:
+ DBUG_ASSERT(0);
+ }
+ }
}
return option->value;
}
+
+static void
+mariadb_getopt_adjust_value(const struct my_option *option, void *value)
+{
+ switch (option->id) {
+ case OPT_COSTS_DISK_READ_COST:
+ case OPT_COSTS_INDEX_BLOCK_COPY_COST:
+ case OPT_COSTS_KEY_CMP_COST:
+ case OPT_COSTS_KEY_COPY_COST:
+ case OPT_COSTS_KEY_LOOKUP_COST:
+ case OPT_COSTS_KEY_NEXT_FIND_COST:
+ case OPT_COSTS_DISK_READ_RATIO:
+ case OPT_COSTS_ROW_COPY_COST:
+ case OPT_COSTS_ROW_LOOKUP_COST:
+ case OPT_COSTS_ROW_NEXT_FIND_COST:
+ case OPT_COSTS_ROWID_CMP_COST:
+ case OPT_COSTS_ROWID_COPY_COST:
+ /* Value from command is line given in usec. Convert to ms */
+ *(double*) value= *(double*) value/1000.0;
+ break;
+ default:
+ break;
+ }
+}
+
+
static void option_error_reporter(enum loglevel level, const char *format, ...)
{
va_list args;
@@ -8524,6 +8616,7 @@ static int get_options(int *argc_ptr, char ***argv_ptr)
my_getopt_get_addr= mysql_getopt_value;
my_getopt_error_reporter= option_error_reporter;
+ my_getopt_adjust_value= mariadb_getopt_adjust_value;
/* prepare all_options array */
my_init_dynamic_array(PSI_INSTRUMENT_ME, &all_options, sizeof(my_option),
diff --git a/sql/mysqld.h b/sql/mysqld.h
index 90306ccb290..22984babf97 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -330,7 +330,7 @@ extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list,
key_LOCK_logger, key_LOCK_manager,
key_LOCK_prepared_stmt_count,
key_LOCK_rpl_status, key_LOCK_server_started,
- key_LOCK_status,
+ key_LOCK_status, key_LOCK_optimizer_costs,
key_LOCK_thd_data, key_LOCK_thd_kill,
key_LOCK_user_conn, key_LOG_LOCK_log,
key_master_info_data_lock, key_master_info_run_lock,
@@ -760,7 +760,8 @@ extern mysql_mutex_t
LOCK_error_log, LOCK_delayed_insert, LOCK_short_uuid_generator,
LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone,
LOCK_active_mi, LOCK_manager, LOCK_user_conn,
- LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_backup_log;
+ LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_backup_log,
+ LOCK_optimizer_costs;
extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_global_system_variables;
extern mysql_rwlock_t LOCK_all_status_vars;
extern mysql_mutex_t LOCK_start_thread;
@@ -795,6 +796,18 @@ enum options_mysqld
OPT_BINLOG_IGNORE_DB,
OPT_BIN_LOG,
OPT_BOOTSTRAP,
+ OPT_COSTS_DISK_READ_COST,
+ OPT_COSTS_INDEX_BLOCK_COPY_COST,
+ OPT_COSTS_KEY_CMP_COST,
+ OPT_COSTS_KEY_COPY_COST,
+ OPT_COSTS_KEY_LOOKUP_COST,
+ OPT_COSTS_KEY_NEXT_FIND_COST,
+ OPT_COSTS_DISK_READ_RATIO,
+ OPT_COSTS_ROW_COPY_COST,
+ OPT_COSTS_ROW_LOOKUP_COST,
+ OPT_COSTS_ROW_NEXT_FIND_COST,
+ OPT_COSTS_ROWID_CMP_COST,
+ OPT_COSTS_ROWID_COPY_COST,
OPT_EXPIRE_LOGS_DAYS,
OPT_BINLOG_EXPIRE_LOGS_SECONDS,
OPT_CONSOLE,
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index fcad1c7159e..7f943a45fbc 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2740,7 +2740,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
table_info.add_table_name(head);
Json_writer_object trace_range(thd, "range_analysis");
- if (unlikely(thd->trace_started()))
+ if (unlikely(thd->trace_started()) && read_time != DBL_MAX)
{
Json_writer_object table_rec(thd, "table_scan");
table_rec.add("rows", records).add("cost", read_time);
@@ -2867,10 +2867,11 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
thd->mem_root= &alloc;
/* Calculate cost of full index read for the shortest covering index */
- if (!force_quick_range && !head->covering_keys.is_clear_all())
+ if (!force_quick_range && !head->covering_keys.is_clear_all() &&
+ !head->no_keyread)
{
- int key_for_use= find_shortest_key(head, &head->covering_keys);
double key_read_time;
+ uint key_for_use= find_shortest_key(head, &head->covering_keys);
key_read_time= head->file->ha_key_scan_and_compare_time(key_for_use,
records);
DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, "
@@ -3057,7 +3058,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
param.table->set_opt_range_condition_rows(group_trp->records);
DBUG_PRINT("info", ("table_rows: %llu opt_range_condition_rows: %llu "
"group_trp->records: %ull",
- table_records, param.table->opt_range_condition_rows,
+ table_records,
+ param.table->opt_range_condition_rows,
group_trp->records));
Json_writer_object grp_summary(thd, "best_group_range_summary");
@@ -5079,7 +5081,7 @@ static double get_sweep_read_cost(const PARAM *param, ha_rows records,
{
DBUG_ENTER("get_sweep_read_cost");
#ifndef OLD_SWEEP_COST
- double cost= (param->table->file->ha_rnd_pos_time(records) +
+ double cost= (param->table->file->ha_rnd_pos_call_time(records) +
(add_time_for_compare ?
records * param->thd->variables.optimizer_where_cost : 0));
DBUG_PRINT("return", ("cost: %g", cost));
@@ -5095,7 +5097,7 @@ static double get_sweep_read_cost(const PARAM *param, ha_rows records,
We are using the primary key to find the rows.
Calculate the cost for this.
*/
- result= table->file->ha_rnd_pos_time(records);
+ result= table->file->ha_rnd_pos_call_time(records);
}
else
{
@@ -5133,7 +5135,7 @@ static double get_sweep_read_cost(const PARAM *param, ha_rows records,
*/
result= busy_blocks;
}
- result+= rows2double(n_rows) * ROW_COPY_COST_THD(param->table->thd);
+ result+= rows2double(n_rows) * param->table->file->ROW_COPY_COST);
}
DBUG_PRINT("return",("cost: %g", result));
DBUG_RETURN(result);
@@ -5347,7 +5349,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
is done in QUICK_RANGE_SELECT::row_in_ranges)
*/
double rid_comp_cost= (rows2double(non_cpk_scan_records) *
- ROWID_COMPARE_COST_THD(param->thd));
+ default_optimizer_costs.rowid_cmp_cost);
imerge_cost+= rid_comp_cost;
trace_best_disjunct.add("cost_of_mapping_rowid_in_non_clustered_pk_scan",
rid_comp_cost);
@@ -5359,7 +5361,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
double sweep_cost= get_sweep_read_cost(param, non_cpk_scan_records, 0);
imerge_cost+= sweep_cost;
trace_best_disjunct.
- add("records", non_cpk_scan_records).
+ add("rows", non_cpk_scan_records).
add("cost_sort_rowid_and_read_disk", sweep_cost).
add("cost", imerge_cost);
}
@@ -5389,7 +5391,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
}
{
- const double dup_removal_cost= Unique::get_use_cost(
+ const double dup_removal_cost= Unique::get_use_cost(thd,
param->imerge_cost_buff, (uint)non_cpk_scan_records,
param->table->file->ref_length,
(size_t)param->thd->variables.sortbuff_size,
@@ -5463,10 +5465,9 @@ skip_to_ror_scan:
double cost;
if ((*cur_child)->is_ror)
{
- /* Ok, we have index_only cost, now get full rows scan cost */
+ /* Ok, we have index_only cost, now get full rows lokoup cost */
cost= param->table->file->
- ha_read_and_compare_time(param->real_keynr[(*cur_child)->key_idx], 1,
- (*cur_child)->records);
+ ha_rnd_pos_call_and_compare_time((*cur_child)->records);
}
else
cost= read_time;
@@ -5935,7 +5936,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
continue;
}
- cost= table->opt_range[(*index_scan)->keynr].index_only_fetch_cost(thd);
+ cost= table->opt_range[(*index_scan)->keynr].index_only_fetch_cost(table);
idx_scan.add("cost", cost);
@@ -6041,7 +6042,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
selected_idx.add("index", key_info->name);
print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts);
selected_idx.
- add("records", (*scan_ptr)->records).
+ add("rows", (*scan_ptr)->records).
add("filtered_records", (*scan_ptr)->filtered_out);
}
}
@@ -6058,7 +6059,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
selected_idx.add("index", key_info->name);
print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts);
selected_idx.
- add("records", (*scan_ptr)->records).
+ add("rows", (*scan_ptr)->records).
add("filtered_records", (*scan_ptr)->filtered_out);
}
}
@@ -6324,7 +6325,8 @@ double get_cpk_filter_cost(ha_rows filtered_records,
*/
static
-bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
+bool check_index_intersect_extension(THD *thd,
+ PARTIAL_INDEX_INTERSECT_INFO *curr,
INDEX_SCAN_INFO *ext_index_scan,
PARTIAL_INDEX_INTERSECT_INFO *next)
{
@@ -6371,7 +6373,7 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
size_t max_memory_size= common_info->max_memory_size;
records_sent_to_unique+= ext_index_scan_records;
- cost= Unique::get_use_cost(buff_elems, (size_t) records_sent_to_unique,
+ cost= Unique::get_use_cost(thd, buff_elems, (size_t) records_sent_to_unique,
key_size,
max_memory_size, compare_factor, TRUE,
&next->in_memory);
@@ -6382,7 +6384,7 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
double cost2;
bool in_memory2;
ha_rows records2= records_sent_to_unique-records_filtered_out_by_cpk;
- cost2= Unique::get_use_cost(buff_elems, (size_t) records2, key_size,
+ cost2= Unique::get_use_cost(thd, buff_elems, (size_t) records2, key_size,
max_memory_size, compare_factor, TRUE,
&in_memory2);
cost2+= get_cpk_filter_cost(ext_index_scan_records, common_info->cpk_scan,
@@ -6442,7 +6444,8 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr,
*/
static
-void find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO *curr)
+void find_index_intersect_best_extension(THD *thd,
+ PARTIAL_INDEX_INTERSECT_INFO *curr)
{
PARTIAL_INDEX_INTERSECT_INFO next;
COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info;
@@ -6475,8 +6478,9 @@ void find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO *curr)
{
*rem_first_index_scan_ptr= *index_scan_ptr;
*index_scan_ptr= rem_first_index_scan;
- if (check_index_intersect_extension(curr, *rem_first_index_scan_ptr, &next))
- find_index_intersect_best_extension(&next);
+ if (check_index_intersect_extension(thd, curr, *rem_first_index_scan_ptr,
+ &next))
+ find_index_intersect_best_extension(thd, &next);
*index_scan_ptr= *rem_first_index_scan_ptr;
*rem_first_index_scan_ptr= rem_first_index_scan;
}
@@ -6528,7 +6532,7 @@ TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
read_time))
DBUG_RETURN(NULL);
- find_index_intersect_best_extension(&init);
+ find_index_intersect_best_extension(thd, &init);
if (common.best_length <= 1 && !common.best_uses_cpk)
DBUG_RETURN(NULL);
@@ -6697,7 +6701,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
*/
ror_scan->index_read_cost=
param->table->file->ha_keyread_and_copy_time(ror_scan->keynr, 1,
- ror_scan->records);
+ ror_scan->records, 0);
DBUG_RETURN(ror_scan);
}
@@ -13885,10 +13889,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
cause= "not single_table";
else if (join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
cause= "rollup";
- else if (table->s->keys == 0) /* There are no indexes to use. */
+ else if (table->s->keys == 0) // There are no indexes to use.
cause= "no index";
else if (join->conds && join->conds->used_tables()
- & OUTER_REF_TABLE_BIT) /* Cannot execute with correlated conditions. */
+ & OUTER_REF_TABLE_BIT) // Cannot execute with correlated conditions.
cause= "correlated conditions";
if (cause)
@@ -14093,7 +14097,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
does not qualify as covering in our case. If this is the case, below
we check that all query fields are indeed covered by 'cur_index'.
*/
- if (cur_index_info->user_defined_key_parts == table->actual_n_key_parts(cur_index_info)
+ if (cur_index_info->user_defined_key_parts ==
+ table->actual_n_key_parts(cur_index_info)
&& pk < MAX_KEY && cur_index != pk &&
(table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX))
{
@@ -14136,7 +14141,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
first Item? If so, then why? What is the array for?
*/
/* Above we already checked that all group items are fields. */
- DBUG_ASSERT((*tmp_group->item)->real_item()->type() == Item::FIELD_ITEM);
+ DBUG_ASSERT((*tmp_group->item)->real_item()->type() ==
+ Item::FIELD_ITEM);
Item_field *group_field= (Item_field *) (*tmp_group->item)->real_item();
if (group_field->field->eq(cur_part->field))
{
@@ -15000,24 +15006,28 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
bool have_min, bool have_max,
double *read_cost, ha_rows *records)
{
+ uint keys_per_block, key_length;
ha_rows table_records;
ha_rows num_groups;
ha_rows num_blocks;
- uint keys_per_block;
ha_rows keys_per_group;
ha_rows keys_per_subgroup; /* Average number of keys in sub-groups */
/* formed by a key infix. */
double p_overlap; /* Probability that a sub-group overlaps two blocks. */
double quick_prefix_selectivity;
double io_cost;
+ handler *file= table->file;
DBUG_ENTER("cost_group_min_max");
+ /* Same code as in handler::key_read_time() */
table_records= table->stat_records();
- /* Assume block is 75 % full */
- keys_per_block= (uint) (table->file->stats.block_size * 3 / 4 /
- (index_info->key_length + table->file->ref_length)
- + 1);
- num_blocks= (ha_rows)(table_records / keys_per_block) + 1;
+ key_length= (index_info->key_length + file->ref_length);
+ num_blocks= (table_records * key_length / INDEX_BLOCK_FILL_FACTOR_DIV *
+ INDEX_BLOCK_FILL_FACTOR_MUL) / file->stats.block_size + 1;
+ keys_per_block= (file->stats.block_size /
+ (key_length * INDEX_BLOCK_FILL_FACTOR_MUL /
+ INDEX_BLOCK_FILL_FACTOR_DIV) +
+ 1);
/* Compute the number of keys in a group. */
if (!group_key_parts)
@@ -15035,7 +15045,10 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
keys_per_group= (table_records / 10) + 1;
}
}
- num_groups= (table_records / keys_per_group) + 1;
+ if (keys_per_group > 1)
+ num_groups= (table_records / keys_per_group) + 1;
+ else
+ num_groups= table_records;
/* Apply the selectivity of the quick select for group prefixes. */
if (range_tree && (quick_prefix_records != HA_POS_ERROR))
@@ -15059,8 +15072,7 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
/* There cannot be more groups than matched records */
set_if_smaller(num_groups, quick_prefix_records);
}
- /* Ensure we don't have more groups than rows in table */
- set_if_smaller(num_groups, table_records);
+ DBUG_ASSERT(num_groups <= table_records);
if (used_key_parts > group_key_parts)
{
@@ -15081,39 +15093,22 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
io_cost= (double) MY_MIN(num_groups * (1 + p_overlap), num_blocks);
}
else
- io_cost= (keys_per_group > keys_per_block) ?
- (have_min && have_max) ? (double) (num_groups + 1) :
- (double) num_groups :
- (double) num_blocks;
+ io_cost= ((keys_per_group > keys_per_block) ?
+ (have_min && have_max) ? (double) (num_groups + 1) :
+ (double) num_groups :
+ (double) num_blocks);
/*
CPU cost must be comparable to that of an index scan as computed
in SQL_SELECT::test_quick_select(). When the groups are small,
e.g. for a unique index, using index scan will be cheaper since it
reads the next record without having to re-position to it on every
- group. To make the CPU cost reflect this, we estimate the CPU cost
- as the sum of:
- 1. Cost for evaluating the condition for each num_group
- KEY_COMPARE_COST (similarly as for index scan).
- 2. Cost for navigating the index structure (assuming a b-tree).
- Note: We only add the cost for one index comparision per block. For a
- b-tree the number of comparisons will be larger. However the cost
- is low as all of the upper level b-tree blocks should be in
- memory.
- TODO: This cost should be provided by the storage engine.
- 3. Cost for comparing the row with the where clause
+ group.
*/
- const THD *thd= table->in_use;
- const double tree_traversal_cost=
- ceil(log(static_cast<double>(table_records))/
- log(static_cast<double>(keys_per_block))) *
- thd->variables.optimizer_key_cmp_cost;
-
- const double cpu_cost= (num_groups *
- (tree_traversal_cost +
- thd->variables.optimizer_where_cost));
-
- *read_cost= io_cost + cpu_cost;
+ uint keyno= (uint) (index_info - table->key_info);
+ *read_cost= file->ha_keyread_and_compare_time(keyno, (ulong) num_groups,
+ num_groups,
+ io_cost);
*records= num_groups;
DBUG_PRINT("info",
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index 8cb82693d96..8848c1820df 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -188,6 +188,7 @@
#include "mariadb.h"
#include "sql_select.h"
#include "opt_trace.h"
+#include "optimizer_defaults.h"
/* Info on a splitting field */
struct SplM_field_info
@@ -665,6 +666,8 @@ add_ext_keyuses_for_splitting_field(Dynamic_array<KEYUSE_EXT> *ext_keyuses,
/*
@brief
Cost of the post join operation used in specification of splittable table
+ This does not include the cost of creating the temporary table as this
+ operation can be executed many times for the same temporary table.
*/
static
@@ -673,13 +676,18 @@ double spl_postjoin_oper_cost(THD *thd, double join_record_count, uint rec_len)
double cost;
TMPTABLE_COSTS tmp_cost= get_tmp_table_costs(thd, join_record_count,
rec_len, 0, 1);
- // cost to fill tmp table
- cost= tmp_cost.create + tmp_cost.write * join_record_count;
- // cost to perform post join operation used here
+ /* cost to fill tmp table */
+ cost= tmp_cost.write * join_record_count;
+ /* cost to perform post join operation used here */
cost+= tmp_cost.lookup * join_record_count;
- cost+= (join_record_count == 0 ? 0 :
- join_record_count * log2 (join_record_count)) *
- SORT_INDEX_CMP_COST; // cost to perform sorting
+ /* cost to preform sorting */
+ /* QQQ
+ We should use cost_of_filesort() for computing sort.
+ Do we always preform sorting ? If not, this should be done conditionally
+ */
+ cost+= ((join_record_count == 0 ? 0 :
+ join_record_count * log2 (join_record_count)) *
+ SORT_INDEX_CMP_COST);
return cost;
}
@@ -873,7 +881,7 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
splitting the function set it as the true plan of materialization
of the table T.
The function caches the found plans for materialization of table T
- together if the info what key was used for splitting. Next time when
+ together with the info what key was used for splitting. Next time when
the optimizer prefers to use the same key the plan is taken from
the cache of plans
@@ -1004,12 +1012,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
spl_opt_info->unsplit_card : 1);
uint rec_len= table->s->rec_buff_length;
-
double split_card= spl_opt_info->unsplit_card * spl_plan->split_sel;
- double oper_cost= split_card *
- spl_postjoin_oper_cost(thd, split_card, rec_len);
- spl_plan->cost= join->best_positions[join->table_count-1].read_time +
- + oper_cost;
+ double oper_cost= (split_card *
+ spl_postjoin_oper_cost(thd, split_card, rec_len));
+ spl_plan->cost= (join->best_positions[join->table_count-1].read_time +
+ oper_cost);
if (unlikely(thd->trace_started()))
{
@@ -1030,7 +1037,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
}
if (spl_plan)
{
- if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
+ if (record_count * spl_plan->cost + COST_EPS < spl_opt_info->unsplit_cost)
{
/*
The best plan that employs splitting is cheaper than
@@ -1054,7 +1061,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
trace.
add("startup_cost", startup_cost).
add("splitting_cost", spl_plan->cost).
- add("records", records);
+ add("rows", records);
}
}
else
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index a944e24ac7a..e996e1738b9 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -35,6 +35,7 @@
#include "sql_test.h"
#include <my_bit.h>
#include "opt_trace.h"
+#include "optimizer_defaults.h"
/*
This file contains optimizations for semi-join subqueries.
@@ -1456,8 +1457,8 @@ void get_delayed_table_estimates(TABLE *table,
hash_sj_engine->tmp_table->s->reclength);
/* Do like in handler::ha_scan_and_compare_time, but ignore the where cost */
- *scan_time= ((data_size/table->file->stats.block_size+2) *
- table->file->avg_io_cost()) + *out_rows * file->ROW_COPY_COST;
+ *scan_time= ((data_size/IO_SIZE * table->file->avg_io_cost()) +
+ *out_rows * file->ROW_COPY_COST);
}
@@ -2580,11 +2581,9 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
We don't need to check the where clause for each row, so no
WHERE_COST is needed.
*/
- scan_cost= (TABLE_SCAN_SETUP_COST +
- (cost.block_size == 0 ? 0 :
- ((rowlen * (double) sjm->rows) / cost.block_size +
- TABLE_SCAN_SETUP_COST)));
+ scan_cost= (rowlen * (double) sjm->rows) / cost.block_size;
total_cost= (scan_cost * cost.cache_hit_ratio * cost.avg_io_cost +
+ TABLE_SCAN_SETUP_COST_THD(thd) +
row_copy_cost * sjm->rows);
sjm->scan_cost.convert_from_cost(total_cost);
@@ -2684,8 +2683,6 @@ get_tmp_table_costs(THD *thd, double row_count, uint row_size, bool blobs_used,
bool add_copy_cost)
{
TMPTABLE_COSTS cost;
- double row_copy_cost= add_copy_cost ? ROW_COPY_COST_THD(thd) : 0;
-
/* From heap_prepare_hp_create_info(), assuming one hash key used */
row_size+= sizeof(char*)*2;
row_size= MY_ALIGN(MY_MAX(row_size, sizeof(char*)) + 1, sizeof(char*));
@@ -2693,24 +2690,31 @@ get_tmp_table_costs(THD *thd, double row_count, uint row_size, bool blobs_used,
if (row_count > thd->variables.max_heap_table_size / (double) row_size ||
blobs_used)
{
+ double row_copy_cost= (add_copy_cost ?
+ tmp_table_optimizer_costs.row_copy_cost :
+ 0);
/* Disk based table */
- cost.lookup= ((DISK_TEMPTABLE_LOOKUP_COST *
- thd->optimizer_cache_hit_ratio)) + row_copy_cost;
- cost.write= cost.lookup + row_copy_cost;
+ cost.lookup= ((tmp_table_optimizer_costs.key_lookup_cost *
+ tmp_table_optimizer_costs.disk_read_ratio) +
+ row_copy_cost);
+ cost.write= cost.lookup;
cost.create= DISK_TEMPTABLE_CREATE_COST;
cost.block_size= DISK_TEMPTABLE_BLOCK_SIZE;
- cost.avg_io_cost= 1.0;
- cost.cache_hit_ratio= thd->optimizer_cache_hit_ratio;
+ cost.avg_io_cost= tmp_table_optimizer_costs.disk_read_cost;
+ cost.cache_hit_ratio= tmp_table_optimizer_costs.disk_read_ratio;
}
else
{
/* Values are as they are in heap.h */
+ double row_copy_cost= (add_copy_cost ?
+ heap_optimizer_costs.row_copy_cost :
+ 0);
cost.lookup= HEAP_TEMPTABLE_LOOKUP_COST + row_copy_cost;
- cost.write= cost.lookup + row_copy_cost;
+ cost.write= cost.lookup;
cost.create= HEAP_TEMPTABLE_CREATE_COST;
- cost.block_size= 0;
- cost.avg_io_cost= HEAP_TEMPTABLE_LOOKUP_COST;
- cost.cache_hit_ratio= 1.0;
+ cost.block_size= 1;
+ cost.avg_io_cost= 0;
+ cost.cache_hit_ratio= 0;
}
return cost;
}
@@ -3181,7 +3185,7 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
if (unlikely(trace.trace_started()))
{
trace.
- add("records", *record_count).
+ add("rows", *record_count).
add("cost", *read_time);
}
return TRUE;
@@ -3235,7 +3239,7 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
best_access_path(join, join->positions[i].table, rem_tables,
join->positions, i,
disable_jbuf, prefix_rec_count, &curpos, &dummy);
- prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_read);
+ prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_out);
prefix_cost= COST_ADD(prefix_cost, curpos.read_time);
//TODO: take into account join condition selectivity here
}
@@ -3262,7 +3266,7 @@ bool Sj_materialization_picker::check_qep(JOIN *join,
if (unlikely(trace.trace_started()))
{
trace.
- add("records", *record_count).
+ add("rows", *record_count).
add("cost", *read_time);
}
return TRUE;
@@ -3363,7 +3367,7 @@ bool LooseScan_picker::check_qep(JOIN *join,
if (unlikely(trace.trace_started()))
{
trace.
- add("records", *record_count).
+ add("rows", *record_count).
add("cost", *read_time);
}
return TRUE;
@@ -3461,7 +3465,7 @@ bool Firstmatch_picker::check_qep(JOIN *join,
- remove fanout added by the last table
*/
if (*record_count)
- *record_count /= join->positions[idx].records_read;
+ *record_count /= join->positions[idx].records_out;
}
else
{
@@ -3482,7 +3486,7 @@ bool Firstmatch_picker::check_qep(JOIN *join,
if (unlikely(trace.trace_started()))
{
trace.
- add("records", *record_count).
+ add("rows", *record_count).
add("cost", *read_time);
}
return TRUE;
@@ -3609,21 +3613,22 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join,
*/
uint first_tab= first_dupsweedout_table;
double dups_cost;
- double prefix_rec_count;
+ double first_weedout_table_rec_count;
double sj_inner_fanout= 1.0;
double sj_outer_fanout= 1.0;
uint temptable_rec_size;
if (first_tab == join->const_tables)
{
- prefix_rec_count= 1.0;
+ first_weedout_table_rec_count= 1.0;
temptable_rec_size= 0;
dups_cost= 0.0;
}
else
{
dups_cost= join->positions[first_tab - 1].prefix_cost;
- prefix_rec_count= join->positions[first_tab - 1].prefix_record_count;
+ first_weedout_table_rec_count=
+ join->positions[first_tab - 1].prefix_record_count;
temptable_rec_size= 8; /* This is not true but we'll make it so */
}
@@ -3659,17 +3664,14 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join,
sj_outer_fanout,
temptable_rec_size,
0, 0);
- double write_cost=
- COST_ADD(one_cost.create,
- COST_MULT(join->positions[first_tab].prefix_record_count,
- sj_outer_fanout * one_cost.write));
- double full_lookup_cost=
- COST_MULT(join->positions[first_tab].prefix_record_count,
- COST_MULT(sj_outer_fanout,
- sj_inner_fanout * one_cost.lookup));
- *read_time= COST_ADD(dups_cost, COST_ADD(write_cost, full_lookup_cost));
+ double prefix_record_count= join->positions[first_tab].prefix_record_count;
+ double write_cost= (one_cost.create +
+ prefix_record_count * sj_outer_fanout * one_cost.write);
+ double full_lookup_cost= (prefix_record_count * sj_outer_fanout *
+ sj_inner_fanout * one_cost.lookup);
+ *read_time= dups_cost + write_cost + full_lookup_cost;
- *record_count= prefix_rec_count * sj_outer_fanout;
+ *record_count= first_weedout_table_rec_count * sj_outer_fanout;
*handled_fanout= dups_removed_fanout;
*strategy= SJ_OPT_DUPS_WEEDOUT;
if (unlikely(join->thd->trace_started()))
@@ -3677,7 +3679,10 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join,
Json_writer_object trace(join->thd);
trace.
add("strategy", "DuplicateWeedout").
- add("records", *record_count).
+ add("prefix_row_count", prefix_record_count).
+ add("tmp_table_rows", sj_outer_fanout).
+ add("sj_inner_fanout", sj_inner_fanout).
+ add("rows", *record_count).
add("dups_cost", dups_cost).
add("write_cost", write_cost).
add("full_lookup_cost", full_lookup_cost).
@@ -3881,7 +3886,7 @@ static void recalculate_prefix_record_count(JOIN *join, uint start, uint end)
prefix_count= 1.0;
else
prefix_count= COST_MULT(join->best_positions[j-1].prefix_record_count,
- join->best_positions[j-1].records_read);
+ join->best_positions[j-1].records_out);
join->best_positions[j].prefix_record_count= prefix_count;
}
@@ -4033,7 +4038,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
join->best_positions, i,
FALSE, prefix_rec_count,
join->best_positions + i, &dummy);
- prefix_rec_count *= join->best_positions[i].records_read;
+ prefix_rec_count *= join->best_positions[i].records_out;
rem_tables &= ~join->best_positions[i].table->table->map;
}
}
@@ -4075,7 +4080,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
TRUE /* no jbuf */,
record_count, join->best_positions + idx, &dummy);
}
- record_count *= join->best_positions[idx].records_read;
+ record_count *= join->best_positions[idx].records_out;
rem_tables &= ~join->best_positions[idx].table->table->map;
}
}
@@ -4133,7 +4138,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
}
}
rem_tables &= ~join->best_positions[idx].table->table->map;
- record_count *= join->best_positions[idx].records_read;
+ record_count *= join->best_positions[idx].records_out;
}
first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN;
first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables);
@@ -5350,7 +5355,8 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
Got a table that's not within any semi-join nest. This is a case
like this:
- SELECT * FROM ot1, nt1 WHERE ot1.col IN (SELECT expr FROM it1, it2)
+ SELECT * FROM ot1, nt1 WHERE
+ ot1.col IN (SELECT expr FROM it1, it2)
with a join order of
@@ -6762,7 +6768,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables)
Json_writer_object trace_wrapper(thd);
Json_writer_object trace_subquery(thd, "subquery_plan");
trace_subquery.
- add("records", inner_record_count_1).
+ add("rows", inner_record_count_1).
add("materialization_cost", materialize_strategy_cost).
add("in_exist_cost", in_exists_strategy_cost).
add("choosen", strategy);
diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h
index 4ba90f6c60b..b0053d3db14 100644
--- a/sql/opt_subselect.h
+++ b/sql/opt_subselect.h
@@ -226,15 +226,17 @@ public:
if (!(found_part & 1 ) && /* no usable ref access for 1st key part */
s->table->covering_keys.is_set(key))
{
+ double records, read_time;
part1_conds_met= TRUE;
DBUG_PRINT("info", ("Can use full index scan for LooseScan"));
/* Calculate the cost of complete loose index scan. */
- double records= rows2double(s->table->file->stats.records);
+ records= rows2double(s->table->file->stats.records);
/* The cost is entire index scan cost (divided by 2) */
- double read_time= s->table->file->ha_keyread_and_copy_time(key, 1,
- (ha_rows) records);
+ read_time= s->table->file->ha_keyread_and_copy_time(key, 1,
+ (ha_rows) records,
+ 0);
/*
Now find out how many different keys we will get (for now we
diff --git a/sql/opt_trace.cc b/sql/opt_trace.cc
index 374fc41aba8..d7b3d83bb18 100644
--- a/sql/opt_trace.cc
+++ b/sql/opt_trace.cc
@@ -696,8 +696,8 @@ void print_best_access_for_table(THD *thd, POSITION *pos)
Json_writer_object obj(thd, "chosen_access_method");
obj.
add("type", pos->type == JT_ALL ? "scan" : join_type_str[pos->type]).
- add("records_read", pos->records_read).
- add("records_out", pos->records_out).
+ add("rows_read", pos->records_read).
+ add("rows_out", pos->records_out).
add("cost", pos->read_time).
add("uses_join_buffering", pos->use_join_buffer);
if (pos->range_rowid_filter_info)
diff --git a/sql/optimizer_costs.h b/sql/optimizer_costs.h
index de933969131..698cdbfe41e 100644
--- a/sql/optimizer_costs.h
+++ b/sql/optimizer_costs.h
@@ -18,41 +18,79 @@
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
*/
-/* This file includes costs variables used by the optimizer */
-
/*
- The table/index cache hit ratio in %. 0 means that a searched for key or row
- will never be in the cache while 100 means it always in the cache.
-
- According to folklore, one need at least 80 % hit rate in the cache for
- MariaDB to run very well. We set CACHE_HIT_RATIO to a bit smaller
- as there is still a cost involved in finding the row in the B tree, hash
- or other seek structure.
+ This file defines costs structures and cost functions used by the optimizer
+*/
- Increasing CACHE_HIT_RATIO will make MariaDB prefer key lookups over
- table scans as the impact of ROW_COPY_COST and INDEX_COPY cost will
- have a larger impact when more rows are exmined..
- Note that avg_io_cost() is multipled with this constant!
+/*
+ OPTIMIZER_COSTS stores cost variables for each engine. They are stored
+ in linked_optimizer_costs (pointed to by handlerton) and TABLE_SHARE.
*/
-#define DEFAULT_CACHE_HIT_RATIO 50
-/* Convert ratio to cost */
-
-static inline double cache_hit_ratio(uint ratio)
+#define OPTIMIZER_COST_UNDEF -1.0
+struct OPTIMIZER_COSTS
{
- return (((double) (100 - ratio)) / 100.0);
-}
+ double disk_read_cost;
+ double index_block_copy_cost;
+ double key_cmp_cost;
+ double key_copy_cost;
+ double key_lookup_cost;
+ double key_next_find_cost;
+ double disk_read_ratio;
+ double row_copy_cost;
+ double row_lookup_cost;
+ double row_next_find_cost;
+ double rowid_cmp_cost;
+ double rowid_copy_cost;
+ double initialized; // Set if default or connected with handlerton
+};
+
+/* Default optimizer costs */
+extern OPTIMIZER_COSTS default_optimizer_costs;
+/*
+ These are used to avoid taking mutex while creating tmp tables
+ These are created once after the server is started so they are
+ not dynamic.
+*/
+extern OPTIMIZER_COSTS heap_optimizer_costs, tmp_table_optimizer_costs;
/*
- Base cost for finding keys and rows from the engine is 1.0
- All other costs should be proportional to these
+ Interface to the engine cost variables. See optimizer_defaults.h for
+ the default values.
*/
-/* Cost for finding the first key in a key scan */
-#define KEY_LOOKUP_COST ((double) 1.0)
-/* Cost of finding a key from a row_ID (not used for clustered keys) */
-#define ROW_LOOKUP_COST ((double) 1.0)
+#define DISK_READ_RATIO costs->disk_read_ratio
+#define KEY_LOOKUP_COST costs->key_lookup_cost
+#define ROW_LOOKUP_COST costs->row_lookup_cost
+#define INDEX_BLOCK_COPY_COST costs->index_block_copy_cost
+#define KEY_COPY_COST costs->key_copy_cost
+#define ROW_COPY_COST costs->row_copy_cost
+#define ROW_COPY_COST_THD(THD) default_optimizer_costs.row_copy_cost
+#define KEY_NEXT_FIND_COST costs->key_next_find_cost
+#define ROW_NEXT_FIND_COST costs->row_next_find_cost
+#define KEY_COMPARE_COST costs->key_cmp_cost
+#define SORT_INDEX_CMP_COST default_optimizer_costs.key_cmp_cost
+#define DISK_READ_COST costs->disk_read_cost
+#define DISK_READ_COST_THD(thd) default_optimizer_costs.disk_read_cost
+
+/* Cost of comparing two rowids. This is set relative to KEY_COMPARE_COST */
+#define ROWID_COMPARE_COST costs->rowid_cmp_cost
+#define ROWID_COMPARE_COST_THD(THD) default_optimizer_costs.rowid_cmp_cost
+
+/* Cost of comparing two rowids. This is set relative to KEY_COPY_COST */
+#define ROWID_COPY_COST costs->rowid_copy_cost
+
+/* Engine unrelated costs. Stored in THD so that the user can change them */
+#define WHERE_COST optimizer_where_cost
+#define WHERE_COST_THD(THD) ((THD)->variables.optimizer_where_cost)
+#define TABLE_SCAN_SETUP_COST optimizer_scan_setup_cost
+#define TABLE_SCAN_SETUP_COST_THD(THD) (THD)->variables.optimizer_scan_setup_cost
+#define INDEX_SCAN_SETUP_COST optimizer_scan_setup_cost/2
+
+/* Default fill factors of an (b-tree) index block is assumed to be 0.75 */
+#define INDEX_BLOCK_FILL_FACTOR_DIV 3
+#define INDEX_BLOCK_FILL_FACTOR_MUL 4
/*
These constants impact the cost of QSORT and priority queue sorting,
@@ -68,94 +106,13 @@ static inline double cache_hit_ratio(uint ratio)
*/
#define QSORT_SORT_SLOWNESS_CORRECTION_FACTOR (0.1)
#define PQ_SORT_SLOWNESS_CORRECTION_FACTOR (0.1)
-/*
- Cost of finding and copying keys from the storage engine index cache to
- an internal cache as part of an index scan.
- Used in handler::keyread_time()
-*/
-#define DEFAULT_INDEX_BLOCK_COPY_COST ((double) 1 / 5.0)
-#define INDEX_BLOCK_COPY_COST(THD) ((THD)->variables.optimizer_index_block_copy_cost)
-
-/*
- Cost of finding the next row during table scan and copying it to
- 'table->record'.
- If this is too small, then table scans will be prefered over 'ref'
- as with table scans there are no key read (KEY_LOOKUP_COST), fewer
- disk reads but more record copying and row comparisions. If it's
- too big then MariaDB will used key lookup even when table scan is
- better.
-*/
-#define DEFAULT_ROW_COPY_COST ((double) 1.0 / 20.0)
-#define ROW_COPY_COST optimizer_row_copy_cost
-#define ROW_COPY_COST_THD(THD) ((THD)->variables.optimizer_row_copy_cost)
/*
Creating a record from the join cache is faster than getting a row from
the engine. JOIN_CACHE_ROW_COPY_COST_FACTOR is the factor used to
take this into account. This is multiplied with ROW_COPY_COST.
*/
-#define JOIN_CACHE_ROW_COPY_COST_FACTOR 0.75
-
-/*
- Cost of finding the next key during index scan and copying it to
- 'table->record'
-
- If this is too small, then index scans will be prefered over 'ref'
- as with table scans there are no key read (KEY_LOOKUP_COST) and
- fewer disk reads.
-*/
-#define DEFAULT_KEY_COPY_COST ((double) 1.0 / 40.0)
-#define KEY_COPY_COST optimizer_key_copy_cost
-#define KEY_COPY_COST_THD(THD) ((THD)->variables.optimizer_key_copy_cost)
-
-/*
- Cost of finding the next index entry and checking it against filter
- This cost is very low as it's done inside the storage engine.
- Should be smaller than KEY_COPY_COST.
- */
-#define DEFAULT_KEY_NEXT_FIND_COST ((double) 1.0 / 80.0)
-#define KEY_NEXT_FIND_COST optimizer_next_find_cost
-
-/**
- The following is used to decide if MariaDB should use table scanning
- instead of reading with keys. The number says how many evaluation of the
- WHERE clause is comparable to reading one extra row from a table.
-*/
-#define DEFAULT_WHERE_COST (1 / 5.0)
-#define WHERE_COST optimizer_where_cost
-#define WHERE_COST_THD(THD) ((THD)->variables.optimizer_where_cost)
-
-#define DEFAULT_KEY_COMPARE_COST (1 / 20.0)
-#define KEY_COMPARE_COST optimizer_key_cmp_cost
-
-/*
- Cost of comparing two rowids. This is set relative to KEY_COMPARE_COST
- This is usally just a memcmp!
-*/
-#define ROWID_COMPARE_COST KEY_COMPARE_COST/10.0
-#define ROWID_COMPARE_COST_THD(THD) ((THD)->variables.KEY_COMPARE_COST / 10.0)
-
-/*
- Setup cost for different operations
-*/
-
-/* Extra cost for doing a range scan. Used to prefer 'ref' over range */
-#define MULTI_RANGE_READ_SETUP_COST (double) (1.0 / 50.0)
-
-/*
- These costs are mainly to handle small tables, like the one we have in the
- mtr test suite
-*/
-/* Extra cost for full table scan. Used to prefer range over table scans */
-#define TABLE_SCAN_SETUP_COST 1.0
-/* Extra cost for full index scan. Used to prefer range over index scans */
-#define INDEX_SCAN_SETUP_COST 1.0
-
-/*
- The lower bound of accepted rows when using filter.
- This is used to ensure that filters are not too agressive.
-*/
-#define MIN_ROWS_AFTER_FILTERING 1.0
+#define JOIN_CACHE_ROW_COPY_COST_FACTOR(thd) 1.0
/*
cost1 is better that cost2 only if cost1 + COST_EPS < cost2
@@ -163,33 +120,8 @@ static inline double cache_hit_ratio(uint ratio)
when there are identical plans. Without COST_EPS some plans in the
test suite would vary depending on floating point calculations done
in different paths.
- */
-#define COST_EPS 0.0001
-
-/*
- For sequential disk seeks the cost formula is:
- DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip
-
- The cost of average seek
- DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0.
-*/
-#define DISK_SEEK_BASE_COST ((double)0.9)
-
-#define BLOCKS_IN_AVG_SEEK 128
-
-#define DISK_SEEK_PROP_COST ((double)0.1/BLOCKS_IN_AVG_SEEK)
-
-/*
- Subquery materialization-related constants
*/
-/* This should match ha_heap::read_time() */
-#define HEAP_TEMPTABLE_LOOKUP_COST 0.05
-#define HEAP_TEMPTABLE_CREATE_COST 1.0
-#define DISK_TEMPTABLE_LOOKUP_COST 1.0
-#define DISK_TEMPTABLE_CREATE_COST TMPFILE_CREATE_COST*2 /* 2 tmp tables */
-#define DISK_TEMPTABLE_BLOCK_SIZE 8192
-
-#define SORT_INDEX_CMP_COST 0.02
+#define COST_EPS 0.0000001
#define COST_MAX (DBL_MAX * (1.0 - DBL_EPSILON))
@@ -207,4 +139,22 @@ static inline double COST_MULT(double c, double f)
return (COST_MAX / (f) > (c) ? (c) * (f) : COST_MAX);
}
+OPTIMIZER_COSTS *get_optimizer_costs(const LEX_CSTRING *cache_name);
+OPTIMIZER_COSTS *create_optimizer_costs(const char *name, size_t length);
+OPTIMIZER_COSTS *get_or_create_optimizer_costs(const char *name,
+ size_t length);
+bool create_default_optimizer_costs();
+void copy_tmptable_optimizer_costs();
+void free_all_optimizer_costs();
+struct TABLE;
+
+extern "C"
+{
+ typedef int (*process_optimizer_costs_t) (const LEX_CSTRING *,
+ const OPTIMIZER_COSTS *,
+ TABLE *);
+ bool process_optimizer_costs(process_optimizer_costs_t func, TABLE *param);
+}
+
+
#endif /* OPTIMIZER_COSTS_INCLUDED */
diff --git a/sql/optimizer_defaults.h b/sql/optimizer_defaults.h
new file mode 100644
index 00000000000..8d74bb91cc3
--- /dev/null
+++ b/sql/optimizer_defaults.h
@@ -0,0 +1,183 @@
+#ifndef OPTIMIZER_DEFAULTS_INCLUDED
+#define OPTIMIZER_DEFAULTS_INCLUDED
+/*
+ Copyright (c) 2022, MariaDB AB
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; version 2 of
+ the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+/*
+ This file contains costs constants used by the optimizer
+ All costs should be based on milliseconds (1 cost = 1 ms)
+*/
+
+/* Cost for finding the first key in a key scan */
+#define DEFAULT_KEY_LOOKUP_COST ((double) 0.000435777)
+
+/* Cost of finding a row based on row_ID */
+#define DEFAULT_ROW_LOOKUP_COST ((double) 0.000130839)
+
+/*
+ Cost of finding and copying key and row blocks from the storage
+ engine index cache to an internal cache as part of an index
+ scan. This includes all mutexes that needs to be taken to get
+ exclusive access to a page. The number is taken from accessing an
+ existing blocks from Aria page cache.
+ Used in handler::scan_time() and handler::keyread_time()
+*/
+#define DEFAULT_INDEX_BLOCK_COPY_COST ((double) 3.56e-05)
+
+/*
+ Cost of copying a row to 'table->record'.
+ Used by scan_time() and rnd_pos_time() methods.
+
+ If this is too small, then table scans will be prefered over 'ref'
+ as with table scans there are no key read (KEY_LOOKUP_COST), fewer
+ disk reads but more record copying and row comparisions. If it's
+ too big then MariaDB will used key lookup even when table scan is
+ better.
+*/
+#define DEFAULT_ROW_COPY_COST ((double) 0.000060866)
+
+/*
+ Cost of copying the key to 'table->record'
+
+ If this is too small, then, for small tables, index scans will be
+ prefered over 'ref' as with index scans there are fewer disk reads.
+*/
+#define DEFAULT_KEY_COPY_COST ((double) 0.000015685)
+
+/*
+ Cost of finding the next index entry and checking its rowid against filter
+ This cost is very low as it's done inside the storage engine.
+ Should be smaller than KEY_COPY_COST.
+ */
+#define DEFAULT_KEY_NEXT_FIND_COST ((double) 0.000082347)
+
+/* Cost of finding the next row when scanning a table */
+#define DEFAULT_ROW_NEXT_FIND_COST ((double) 0.000045916)
+
+/**
+ The cost of executing the WHERE clause as part of any row check.
+ Increasing this would force the optimizer to use row combinations
+ that reads fewer rows.
+ The default cost comes from recording times from a simple where clause that
+ compares two fields (date and a double) with constants.
+*/
+#define DEFAULT_WHERE_COST ((double) 3.2e-05)
+
+/* The cost of comparing a key when using range access or sorting */
+#define DEFAULT_KEY_COMPARE_COST 0.000011361
+
+/* Rowid compare is usually just a single memcmp of a short string */
+#define DEFAULT_ROWID_COMPARE_COST 0.000002653
+/* Rowid copy is usually just a single memcpy of a short string */
+#define DEFAULT_ROWID_COPY_COST 0.000002653
+
+/*
+ Average disk seek time on a hard disk is 8-10 ms, which is also
+ about the time to read a IO_SIZE (8192) block.
+
+ A medium ssd is about 400MB/second, which gives us the time for
+ reading an IO_SIZE block to IO_SIZE/400000000 = 0.0000204 sec= 0.02 ms.
+*/
+#define DEFAULT_DISK_READ_COST ((double) IO_SIZE / 400000000.0 * 1000)
+
+/*
+ The follwoing is an old comment for hard-disks, please ignore the
+ following, except if you like history:
+
+ For sequential hard disk seeks the cost formula is:
+ DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip
+
+ The cost of average seek
+ DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK = 10.
+*/
+
+
+/*
+ The table/index cache_miss/total_cache_request ratio.
+ 1.0 means that a searched for key or row will never be in the cache while
+ 0.0 means it always in the cache (and we don't have to do any disk reads).
+
+ According to folklore, one should not have to access disk for more
+ than 20% of the cache request for MariaDB to run very well.
+ However in practice when we read rows or keys in a query, we will often
+ read the same row over and over again. Because of this we set
+ DEFAULT_DISK_READ_RATIO to 0.20/10 = 0.02.
+
+ Increasing DISK_READ_RATIO will make MariaDB prefer key lookup over
+ table scans as the impact of ROW_COPY_COST and INDEX_COPY cost will
+ have a larger impact when more rows are examined..
+
+ We are not yet taking into account cache usage statistics as this
+ could confuse users as the EXPLAIN and costs for a query would change
+ between to query calls, which may confuse users (and also make the
+ mtr tests very unpredictable).
+
+ Note that the engine's avg_io_cost() (DEFAULT_DISK_READ_COST by default)
+ is multiplied with this constant!
+*/
+
+#define DEFAULT_DISK_READ_RATIO 0.02
+
+/*
+ The following costs are mainly to ensure we don't do table and index
+ scans for small tables, like the one we have in the mtr test suite.
+
+ This is mostly to keep the mtr tests use indexes (as the optimizer would
+ if the tables are large). It will also ensure that EXPLAIN is showing
+ more key user for users where they are testing queries with small tables
+ at the start of projects.
+ This is probably OK for most a the execution time difference between table
+ scan and index scan compared to key lookups so small when using small
+ tables. It also helps to fill the index cache which will help mitigate
+ the speed difference.
+*/
+
+/*
+ Extra cost for full table and index scan. Used to prefer key and range
+ over index and table scans
+
+ INDEX_SCAN_SETUP_COST (defined in optimizer_costs.h) is half of
+ table_scan_setup_cost to get the optimizer to prefer index scans to table
+ scans as key copy is faster than row copy and index blocks provides
+ more information in the cache.
+
+ This will also help MyISAM as with MyISAM the table scans has a cost
+ very close to index scans (they are fast but require a read call
+ that we want to avoid even if it's small).
+
+ 10 usec is about 10 MyISAM row lookups with optimizer_disk_read_ratio= 0.02
+*/
+#define DEFAULT_TABLE_SCAN_SETUP_COST 0.01 // 10 usec
+
+/* Extra cost for doing a range scan. Used to prefer 'ref' over range */
+#define MULTI_RANGE_READ_SETUP_COST KEY_LOOKUP_COST
+
+/*
+ Temporary file and temporary table related costs
+ Used with subquery materialization, derived tables etc
+*/
+
+#define TMPFILE_CREATE_COST 0.5 // Cost of creating and deleting files
+#define HEAP_TEMPTABLE_CREATE_COST 0.025 // ms
+/* Cost taken from HEAP_LOOKUP_COST in ha_heap.cc */
+#define HEAP_TEMPTABLE_LOOKUP_COST (0.00016097*1000 + heap_optimizer_costs.row_copy_cost)
+#define DISK_TEMPTABLE_LOOKUP_COST(thd) (tmp_table_optimizer_costs.key_lookup_cost + tmp_table_optimizer_costs.row_lookup_cost + tmp_table_optimizer_costs.row_copy_cost)
+#define DISK_TEMPTABLE_CREATE_COST TMPFILE_CREATE_COST*2 // 2 tmp tables
+#define DISK_TEMPTABLE_BLOCK_SIZE IO_SIZE
+
+#endif /* OPTIMIZER_DEFAULTS_INCLUDED */
diff --git a/sql/rowid_filter.cc b/sql/rowid_filter.cc
index c0f7fe0755a..4f713edb47f 100644
--- a/sql/rowid_filter.cc
+++ b/sql/rowid_filter.cc
@@ -32,7 +32,7 @@ lookup_cost(Rowid_filter_container_type cont_type)
{
switch (cont_type) {
case SORTED_ARRAY_CONTAINER:
- return log(est_elements)*0.01+key_next_find_cost;
+ return log(est_elements) * rowid_compare_cost + base_lookup_cost;
default:
DBUG_ASSERT(0);
return 0;
@@ -125,11 +125,13 @@ void Range_rowid_filter_cost_info::init(Rowid_filter_container_type cont_type,
key_no= idx;
est_elements= (ulonglong) table->opt_range[key_no].rows;
cost_of_building_range_filter= build_cost(container_type);
+
where_cost= tab->in_use->variables.optimizer_where_cost;
- key_next_find_cost= tab->in_use->variables.optimizer_key_next_find_cost;
+ base_lookup_cost= tab->file->ROW_NEXT_FIND_COST;
+ rowid_compare_cost= tab->file->ROWID_COMPARE_COST;
selectivity= est_elements/((double) table->stat_records());
gain= avg_access_and_eval_gain_per_row(container_type,
- tab->file->optimizer_cache_cost);
+ tab->file->ROW_LOOKUP_COST);
if (gain > 0)
cross_x= cost_of_building_range_filter/gain;
else
@@ -147,15 +149,18 @@ double
Range_rowid_filter_cost_info::build_cost(Rowid_filter_container_type cont_type)
{
double cost;
+ OPTIMIZER_COSTS *costs= &table->s->optimizer_costs;
DBUG_ASSERT(table->opt_range_keys.is_set(key_no));
- cost= table->opt_range[key_no].index_only_fetch_cost(table->in_use);
+ /* Cost of fetching keys */
+ cost= table->opt_range[key_no].index_only_fetch_cost(table);
switch (cont_type) {
-
case SORTED_ARRAY_CONTAINER:
- cost+= ARRAY_WRITE_COST * est_elements; /* cost filling the container */
- cost+= ARRAY_SORT_C * est_elements * log(est_elements); /* sorting cost */
+ /* Add cost of filling container and cost of sorting */
+ cost= (est_elements *
+ (costs->rowid_copy_cost + // Copying rowid
+ costs->rowid_cmp_cost * log2(est_elements))); // Sort
break;
default:
DBUG_ASSERT(0);
diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h
index 866b52b156b..46664c18faa 100644
--- a/sql/rowid_filter.h
+++ b/sql/rowid_filter.h
@@ -143,20 +143,6 @@ class SQL_SELECT;
class Rowid_filter_container;
class Range_rowid_filter_cost_info;
-/*
- Cost to write rowid into array. Assume inserting 1000 row id's into the
- array has same cost as a 'disk io' or key fetch
-*/
-#define ARRAY_WRITE_COST 0.001
-/*
- Factor used to calculate cost of sorting rowids in array
- This is multiplied by 'elements * log(elements)', so this factor
- has a very high cost weight!
- A value of 0.001 will have 200 rows have a cost of 1.05 and
- 1000 rows a cost of 6.90.
-*/
-#define ARRAY_SORT_C 0.001
-
typedef enum
{
SORTED_ARRAY_CONTAINER,
@@ -395,7 +381,8 @@ class Range_rowid_filter_cost_info final: public Sql_alloc
/* The index whose range scan would be used to build the range filter */
uint key_no;
double cost_of_building_range_filter;
- double where_cost, key_next_find_cost;
+ double where_cost, base_lookup_cost, rowid_compare_cost;
+
/*
(gain*row_combinations)-cost_of_building_range_filter yields the gain of
the filter for 'row_combinations' key tuples of the index key_no
diff --git a/sql/set_var.cc b/sql/set_var.cc
index 8cb5fcd4870..274ee07f07d 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -310,7 +310,13 @@ do { \
case SHOW_HA_ROWS: do_num_val (ha_rows,CMD);
#define case_for_double(CMD) \
- case SHOW_DOUBLE: do_num_val (double,CMD)
+ case SHOW_DOUBLE: do_num_val (double,CMD); \
+ case SHOW_OPTIMIZER_COST: \
+ { \
+ double val= ((*(double*) value) == OPTIMIZER_COST_UNDEF ? OPTIMIZER_COST_UNDEF : \
+ (*(double*) value) * 1000); \
+ CMD; \
+ } while (0)
#define case_get_string_as_lex_string \
case SHOW_CHAR: \
diff --git a/sql/set_var.h b/sql/set_var.h
index 570703a8222..38a395adf0f 100644
--- a/sql/set_var.h
+++ b/sql/set_var.h
@@ -84,7 +84,7 @@ protected:
typedef bool (*on_update_function)(sys_var *self, THD *thd, enum_var_type type);
int flags; ///< or'ed flag_enum values
- const SHOW_TYPE show_val_type; ///< what value_ptr() returns for sql_show.cc
+ SHOW_TYPE show_val_type; ///< what value_ptr() returns for sql_show.cc
PolyLock *guard; ///< *second* lock that protects the variable
ptrdiff_t offset; ///< offset to the value from global_system_variables
on_check_function on_check;
diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h
index 02dc8198c7c..61b3df2d086 100644
--- a/sql/sql_bitmap.h
+++ b/sql/sql_bitmap.h
@@ -270,13 +270,21 @@ public:
{
return buffer[0];
}
- uint bits_set()
+ uint bits_set() const
{
uint res= 0;
for (size_t i= 0; i < ARRAY_ELEMENTS; i++)
- res += my_count_bits(buffer[i]);
+ if (buffer[i])
+ res+= my_count_bits(buffer[i]);
return res;
}
+ uint find_first_bit() const
+ {
+ for (size_t i= 0; i < ARRAY_ELEMENTS; i++)
+ if (buffer[i])
+ return (uint)i*BITS_PER_ELEMENT + my_find_first_bit(buffer[i]);
+ return width;
+ }
class Iterator
{
const Bitmap& map;
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 4074481880c..9b064b617cb 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1236,7 +1236,6 @@ void THD::init()
*/
variables.pseudo_thread_id= thread_id;
variables.default_master_connection.str= default_master_connection_buff;
- optimizer_cache_hit_ratio= cache_hit_ratio(variables.optimizer_cache_hit_ratio);
::strmake(default_master_connection_buff,
global_system_variables.default_master_connection.str,
variables.default_master_connection.length);
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 845b1bad024..26d64c28fe2 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -692,9 +692,7 @@ typedef struct system_variables
ulonglong slave_skip_counter;
ulonglong max_relay_log_size;
- double optimizer_index_block_copy_cost, optimizer_key_next_find_cost;
- double optimizer_row_copy_cost, optimizer_key_copy_cost;
- double optimizer_where_cost, optimizer_key_cmp_cost;
+ double optimizer_where_cost, optimizer_scan_setup_cost;
double long_query_time_double, max_statement_time_double;
double sample_percentage;
@@ -793,7 +791,6 @@ typedef struct system_variables
uint group_concat_max_len;
uint eq_range_index_dive_limit;
- uint optimizer_cache_hit_ratio; // Stored in handler::optimizer_cache_cost
uint idle_transaction_timeout;
uint idle_readonly_transaction_timeout;
uint idle_write_transaction_timeout;
@@ -831,7 +828,6 @@ typedef struct system_variables
my_bool session_track_user_variables;
#endif // USER_VAR_TRACKING
my_bool tcp_nodelay;
-
plugin_ref table_plugin;
plugin_ref tmp_table_plugin;
plugin_ref enforced_table_plugin;
@@ -2677,7 +2673,6 @@ public:
struct system_status_var org_status_var; // For user statistics
struct system_status_var *initial_status_var; /* used by show status */
THR_LOCK_INFO lock_info; // Locking info of this thread
- double optimizer_cache_hit_ratio; // From optimizer_cache_hit_ratio
/**
Protects THD data accessed from other threads:
@@ -7426,6 +7421,13 @@ inline void handler::decrement_statistics(ulong SSV::*offset) const
status_var_decrement(table->in_use->status_var.*offset);
}
+/* Update references in the handler to the table */
+
+inline void handler::set_table(TABLE* table_arg)
+{
+ table= table_arg;
+ costs= &table_arg->s->optimizer_costs;
+}
inline int handler::ha_ft_read(uchar *buf)
{
diff --git a/sql/sql_const.h b/sql/sql_const.h
index 98803989f51..11aadd11174 100644
--- a/sql/sql_const.h
+++ b/sql/sql_const.h
@@ -121,11 +121,11 @@
/*
This is used when reading large blocks, sequential read.
- We assume that reading this much will be the same cost as 1 seek / fetching
- one row from the storage engine.
+ We assume that reading this much will be roughly the same cost as 1
+ seek / fetching one row from the storage engine.
+ Cost of one read of DISK_CHUNK_SIZE is DISK_SEEK_BASE_COST (ms).
*/
#define DISK_CHUNK_SIZE (uint) (65536) /* Size of diskbuffer for tmpfiles */
-#define TMPFILE_CREATE_COST 2.0 /* Creating and deleting tmp file */
#define FRM_VER_TRUE_VARCHAR (FRM_VER+4) /* 10 */
#define FRM_VER_EXPRESSSIONS (FRM_VER+5) /* 11 */
@@ -204,8 +204,14 @@
#define MIN_ROWS_TO_USE_TABLE_CACHE 100
#define MIN_ROWS_TO_USE_BULK_INSERT 100
+/*
+ The lower bound of accepted rows when using filter.
+ This is used to ensure that filters are not too agressive.
+*/
+#define MIN_ROWS_AFTER_FILTERING 1.0
+
/**
- Number of rows in a reference table when refereed through a not unique key.
+ Number of rows in a reference table when refered through a not unique key.
This value is only used when we don't know anything about the key
distribution.
*/
diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc
index ede486fc297..9f907c9ed2c 100644
--- a/sql/sql_explain.cc
+++ b/sql/sql_explain.cc
@@ -1368,10 +1368,12 @@ double Explain_table_access::get_r_filtered()
}
-int Explain_table_access::print_explain(select_result_sink *output, uint8 explain_flags,
+int Explain_table_access::print_explain(select_result_sink *output,
+ uint8 explain_flags,
bool is_analyze,
uint select_id, const char *select_type,
- bool using_temporary, bool using_filesort)
+ bool using_temporary,
+ bool using_filesort)
{
THD *thd= output->thd; // note: for SHOW EXPLAIN, this is target thd.
MEM_ROOT *mem_root= thd->mem_root;
@@ -1999,6 +2001,9 @@ void Explain_table_access::print_explain_json(Explain_query *query,
writer->add_double(jbuf_tracker.get_filtered_after_where()*100.0);
else
writer->add_null();
+
+ writer->add_member("r_unpack_time_ms");
+ writer->add_double(jbuf_unpack_tracker.get_time_ms());
}
}
diff --git a/sql/sql_explain.h b/sql/sql_explain.h
index 38c5c3e6595..42a1c360e5b 100644
--- a/sql/sql_explain.h
+++ b/sql/sql_explain.h
@@ -753,7 +753,7 @@ public:
class Explain_table_access : public Sql_alloc
{
public:
- Explain_table_access(MEM_ROOT *root) :
+ Explain_table_access(MEM_ROOT *root, bool timed) :
derived_select_number(0),
non_merged_sjm_number(0),
extra_tags(root),
@@ -766,6 +766,7 @@ public:
pushed_index_cond(NULL),
sjm_nest(NULL),
pre_join_sort(NULL),
+ jbuf_unpack_tracker(timed),
rowid_filter(NULL)
{}
~Explain_table_access() { delete sjm_nest; }
@@ -874,6 +875,7 @@ public:
Gap_time_tracker extra_time_tracker;
Table_access_tracker jbuf_tracker;
+ Time_and_counter_tracker jbuf_unpack_tracker;
Explain_rowid_filter *rowid_filter;
diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc
index 1347e38753d..5fcfe0e0e0a 100644
--- a/sql/sql_join_cache.cc
+++ b/sql/sql_join_cache.cc
@@ -1600,6 +1600,7 @@ bool JOIN_CACHE::put_record()
bool JOIN_CACHE::get_record()
{
bool res;
+ ANALYZE_START_TRACKING(thd(), join_tab->jbuf_unpack_tracker);
uchar *prev_rec_ptr= 0;
if (with_length)
pos+= size_of_rec_len;
@@ -1615,6 +1616,7 @@ bool JOIN_CACHE::get_record()
if (prev_cache)
prev_cache->get_record_by_pos(prev_rec_ptr);
}
+ ANALYZE_STOP_TRACKING(thd(), join_tab->jbuf_unpack_tracker);
return res;
}
diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h
index d4df8c6468f..df5cd37c3c6 100644
--- a/sql/sql_plugin.h
+++ b/sql/sql_plugin.h
@@ -24,6 +24,7 @@
#define SHOW_always_last SHOW_KEY_CACHE_LONG, \
SHOW_HAVE, SHOW_MY_BOOL, SHOW_HA_ROWS, SHOW_SYS, \
SHOW_LONG_NOFLUSH, SHOW_LEX_STRING, SHOW_ATOMIC_COUNTER_UINT32_T, \
+ SHOW_OPTIMIZER_COST, \
/* SHOW_*_STATUS must be at the end, SHOW_LONG_STATUS being first */ \
SHOW_LONG_STATUS, SHOW_DOUBLE_STATUS, SHOW_LONGLONG_STATUS, \
SHOW_UINT32_STATUS
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 0532c6c000c..9a1dfd83508 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -47,6 +47,7 @@
// print_sjm, print_plan, TEST_join
#include "records.h" // init_read_record, end_read_record
#include "filesort.h" // filesort_free_buffers
+#include "filesort_utils.h" // get_qsort_sort_cost
#include "sql_union.h" // mysql_union
#include "opt_subselect.h"
#include "sql_derived.h"
@@ -68,6 +69,7 @@
#include "my_json_writer.h"
#include "opt_trace.h"
#include "create_tmp_table.h"
+#include "optimizer_defaults.h"
/*
A key part number that means we're using a fulltext scan.
@@ -99,14 +101,7 @@
#define crash_if_first_double_is_bigger(A,B) DBUG_ASSERT(((A) == 0.0 && (B) == 0.0) || (A)/(B) < 1.0000001)
-#define double_to_rows(A) ((A) >= ((double)HA_POS_ERROR) ? HA_POS_ERROR : (ha_rows) (A))
-
-/* Cost for reading a row through an index */
-struct INDEX_READ_COST
-{
- double read_cost;
- double index_only_cost;
-};
+#define double_to_rows(A) ((A) >= ((double)HA_ROWS_MAX) ? HA_ROWS_MAX : (ha_rows) (A))
const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
"MAYBE_REF","ALL","range","index","fulltext",
@@ -257,7 +252,6 @@ static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond,
bool is_top_and_level);
static Item* part_of_refkey(TABLE *form,Field *field);
-uint find_shortest_key(TABLE *table, const key_map *usable_keys);
static bool test_if_cheaper_ordering(const JOIN_TAB *tab,
ORDER *order, TABLE *table,
key_map usable_keys, int key,
@@ -331,7 +325,8 @@ static bool find_order_in_list(THD *, Ref_ptr_array, TABLE_LIST *, ORDER *,
List<Item> &, List<Item> &, bool, bool, bool);
static double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
- table_map rem_tables);
+ table_map rem_tables,
+ double *records_out);
void set_postjoin_aggr_write_func(JOIN_TAB *tab);
static Item **get_sargable_cond(JOIN *join, TABLE *table);
@@ -433,7 +428,7 @@ bool dbug_user_var_equals_str(THD *thd, const char *name, const char* value)
POSITION::POSITION()
{
table= 0;
- records_read= cond_selectivity= read_time= records_out= 0.0;
+ records_read= cond_selectivity= read_time= records_out= records_init= 0.0;
prefix_record_count= 0.0;
key= 0;
forced_index= 0;
@@ -1896,6 +1891,13 @@ int JOIN::optimize()
res= build_explain();
optimization_state= JOIN::OPTIMIZATION_DONE;
}
+
+ /*
+ Store the cost of this query into a user variable
+ TODO: calculate a correct cost for a query with subqueries and UNIONs.
+ */
+ if (select_lex->select_number == 1)
+ thd->status_var.last_query_cost= best_read;
return res;
}
@@ -2045,6 +2047,7 @@ JOIN::optimize_inner()
{
DBUG_ENTER("JOIN::optimize_inner");
subq_exit_fl= false;
+ best_read= 0.0;
DEBUG_SYNC(thd, "before_join_optimize");
THD_STAGE_INFO(thd, stage_optimizing);
@@ -3588,7 +3591,7 @@ bool JOIN::make_aggr_tables_info()
TABLE* table= create_tmp_table(thd, curr_tab->tmp_table_param,
all_fields,
NULL, distinct,
- TRUE, select_options, HA_POS_ERROR,
+ TRUE, select_options, HA_ROWS_MAX,
&empty_clex_str, !need_tmp,
keep_row_order);
if (!table)
@@ -4233,7 +4236,7 @@ bool
JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order)
{
tab->filesort=
- new (thd->mem_root) Filesort(order, HA_POS_ERROR, tab->keep_current_rowid,
+ new (thd->mem_root) Filesort(order, HA_ROWS_MAX, tab->keep_current_rowid,
tab->select);
if (!tab->filesort)
return true;
@@ -5270,7 +5273,6 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
DYNAMIC_ARRAY *keyuse_array)
{
int error= 0;
- TABLE *UNINIT_VAR(table); /* inited in all loops */
uint i,table_count,const_count,key;
uint sort_space;
table_map found_const_table_map, all_table_map;
@@ -5331,8 +5333,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
for (s= stat, i= 0; (tables= ti++); s++, i++)
{
TABLE_LIST *embedding= tables->embedding;
+ TABLE *table= tables->table;
stat_vector[i]=s;
- table_vector[i]=s->table=table=tables->table;
+ table_vector[i]= s->table= table;
s->tab_list= tables;
table->pos_in_table_list= tables;
error= tables->fetch_number_of_rows();
@@ -5465,7 +5468,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
for (s= stat ; s < stat_end ; s++)
{
- table= s->table;
+ TABLE *table= s->table;
for (JOIN_TAB *t= stat ; t < stat_end ; t++)
{
if (t->dependent & table->map)
@@ -5569,7 +5572,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
{
- table=s->table;
+ TABLE *table= s->table;
if (table->is_filled_at_execution())
continue;
@@ -5622,7 +5625,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
(*s->on_expr_ref)->is_expensive()))
{ // system table
int tmp= 0;
- s->type=JT_SYSTEM;
+ s->type= JT_SYSTEM;
join->const_table_map|=table->map;
set_position(join,const_count++,s,(KEYUSE*) 0);
if ((tmp= join_read_const_table(join->thd, s,
@@ -5825,19 +5828,20 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
s->startup_cost= 0;
if (s->type == JT_SYSTEM || s->type == JT_CONST)
{
-
Json_writer_object table_records(thd);
- /* Only one matching row */
- s->found_records= s->records= 1;
- s->records_out= 1.0;
+ ha_rows records= 1;
+ if (s->type == JT_SYSTEM || s->table->file->stats.records == 0)
+ records= s->table->file->stats.records;
+ /* zero or one matching row */
+ s->records= s->found_records= records;
+ s->records_init= s->records_out= rows2double(records);
s->read_time=1.0;
s->worst_seeks=1.0;
- table_records.add_table_name(s)
- .add("rows", s->found_records)
- .add("cost", s->read_time)
- .add("table_type", s->type == JT_CONST ?
- "const" :
- "system");
+ table_records.add_table_name(s).
+ add("rows", s->found_records).
+ add("cost", s->read_time).
+ add("table_type", s->type == JT_CONST ?
+ "const" : "system");
continue;
}
/*
@@ -5889,7 +5893,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
s->table->pos_in_table_list->is_materialized_derived())) // (3)
{
bool impossible_range= FALSE;
- ha_rows records= HA_POS_ERROR;
+ ha_rows records= HA_ROWS_MAX;
SQL_SELECT *select= 0;
Item **sargable_cond= NULL;
if (!s->const_keys.is_clear_all())
@@ -5956,6 +5960,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
}
else
{
+ double records= 1;
join->const_table_map|= s->table->map;
set_position(join,const_count++,s,(KEYUSE*) 0);
s->type= JT_CONST;
@@ -5966,7 +5971,10 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
s->info= ET_IMPOSSIBLE_ON_CONDITION;
found_const_table_map|= s->table->map;
mark_as_null_row(s->table); // All fields are NULL
+ records= 0;
}
+ s->records_init= s->records_out= records;
+ s->found_records= s->records= (ha_rows)records;
}
}
if (records != HA_POS_ERROR)
@@ -6055,7 +6063,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
for (i= 0; i < join->table_count ; i++)
if (double rr= join->best_positions[i].records_read)
records= COST_MULT(records, rr);
- rows= records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records;
+ rows= double_to_rows(records);
set_if_smaller(rows, unit->lim.get_select_limit());
join->select_lex->increase_derived_records(rows);
}
@@ -7697,8 +7705,9 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
{
join->positions[idx].table= table;
join->positions[idx].key=key;
- join->positions[idx].records_read=1.0; /* This is a const table */
- join->positions[idx].records_out=1.0; /* This is a const table */
+ join->positions[idx].records_read=1.0; /* This is a const table */
+ join->positions[idx].records_out=1.0; /* This is a const table */
+ join->positions[idx].records_init=1.0; /* This is a const table */
join->positions[idx].cond_selectivity= 1.0;
join->positions[idx].ref_depend_map= 0;
@@ -7751,7 +7760,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
TODO:
Extend with_found_constraint' to be set for a top level expression of type
X=Y where X and Y has fields from current table and at least one field from
- one o more previous tables.
+ one or more previous tables.
@see also
table_after_join_selectivity() produces selectivity of condition that is
@@ -7851,37 +7860,29 @@ INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table,
DBUG_ENTER("cost_for_index_read");
rows_adjusted= MY_MIN(rows2double(records), (double) thd->variables.max_seeks_for_key);
+ set_if_bigger(rows_adjusted, 1);
+
#ifdef OLD_CODE_LIMITED_SEEKS
set_if_smaller(rows_adjusted, worst_seeks);
#endif
if (file->is_clustering_key(key))
{
- cost.index_only_cost= file->ha_read_time(key, 1, (ha_rows)rows_adjusted);
- /*
- Same computation as in ha_read_and_copy_time()
- We do it explicitely here as we want to use the original value of
- records to compute the record copy cost.
- */
- cost.read_cost= (cost.index_only_cost +
- rows2double(records) * ROW_COPY_COST_THD(thd));
+ cost.index_only_cost=
+ file->ha_keyread_clustered_and_copy_time(key, 1, rows_adjusted, 0);
+ /* There is no 'index_only_read' with a clustered index */
+ cost.read_cost= cost.index_only_cost;
}
else if (table->covering_keys.is_set(key) && !table->no_keyread)
{
- cost.index_only_cost= file->ha_keyread_time(key, 1, (ha_rows)rows_adjusted);
+ cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0);
/* Same computation as in ha_keyread_and_copy_time() */
cost.read_cost= (cost.index_only_cost +
- rows2double(records) * KEY_COPY_COST_THD(thd));
+ rows2double(records) * file->KEY_COPY_COST);
}
else
{
- cost.index_only_cost= file->ha_keyread_time(key, 1, (ha_rows) rows_adjusted);
- /*
- Note that ha_read_time() + ..ROW_COPY_COST should be same
- as ha_rnd_pos_time().
- */
- cost.read_cost= (cost.index_only_cost +
- file->ha_read_time(key, 0, (ha_rows)rows_adjusted) +
- rows2double(records) * ROW_COPY_COST_THD(thd));
+ cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0);
+ cost.read_cost= (cost.index_only_cost + file->ha_rnd_pos_time(records));
}
DBUG_PRINT("statistics", ("index_cost: %.3f full_cost: %.3f",
cost.index_only_cost, cost.read_cost));
@@ -7950,8 +7951,8 @@ apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg,
read even if selectivity (and thus new_records) would be very low.
*/
new_cost= (MY_MAX(cost_of_accepted_rows,
- ranges * KEY_LOOKUP_COST * io_cost *
- table->file->optimizer_cache_cost) +
+ ranges * table->file->KEY_LOOKUP_COST +
+ ranges * io_cost * table->file->DISK_READ_RATIO) +
cost_of_rejected_rows + filter_lookup_cost);
new_total_cost= ((new_cost + new_records * WHERE_COST_THD(thd)) *
prev_records + filter_startup_cost);
@@ -8015,6 +8016,24 @@ apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg,
None
*/
+struct best_plan
+{
+ double cost; // Smallest cost found
+ double records; // Old 'Records'
+ double records_read; // Records accessed
+ double records_after_filter; // Records_read + filter
+ double records_out; // Smallest record count seen
+ Range_rowid_filter_cost_info *filter; // Best filter
+ KEYUSE *key; // Best key
+ SplM_plan_info *spl_plan;
+ table_map ref_depends_map;
+ enum join_type type;
+ uint forced_index;
+ uint max_key_part;
+ bool uses_jbuf;
+};
+
+
void
best_access_path(JOIN *join,
JOIN_TAB *s,
@@ -8030,14 +8049,7 @@ best_access_path(JOIN *join,
uint use_cond_selectivity=
thd->variables.optimizer_use_condition_selectivity;
TABLE *table= s->table;
- KEYUSE *best_key= 0;
- uint best_max_key_part= 0;
- uint best_forced_index= MAX_KEY, forced_index= MAX_KEY;
my_bool found_constraint= 0;
- double best_cost= DBL_MAX;
- double records= DBL_MAX;
- double records_out= table->stat_records() * table->cond_selectivity;
- table_map best_ref_depends_map= 0;
/*
key_dependent is 0 if all key parts could be used or if there was an
EQ_REF table found (which uses all key parts). In other words, we cannot
@@ -8045,18 +8057,29 @@ best_access_path(JOIN *join,
Otherwise it's a bitmap of tables that could improve key usage.
*/
table_map key_dependent= 0;
- Range_rowid_filter_cost_info *best_filter= 0;
double tmp;
ha_rows rec;
- bool best_uses_jbuf= FALSE;
MY_BITMAP *eq_join_set= &s->table->eq_join_set;
KEYUSE *hj_start_key= 0;
- SplM_plan_info *spl_plan= 0;
- enum join_type best_type= JT_UNKNOWN, type= JT_UNKNOWN;
Loose_scan_opt loose_scan_opt;
+ struct best_plan best;
Json_writer_object trace_wrapper(thd, "best_access_path");
DBUG_ENTER("best_access_path");
+ best.cost= DBL_MAX;
+ best.records= DBL_MAX;
+ best.records_read= DBL_MAX;
+ best.records_after_filter= DBL_MAX;
+ best.records_out= table->stat_records() * table->cond_selectivity;
+ best.filter= 0;
+ best.key= 0;
+ best.max_key_part= 0;
+ best.type= JT_UNKNOWN;
+ best.forced_index= MAX_KEY;
+ best.ref_depends_map= 0;
+ best.uses_jbuf= FALSE;
+ best.spl_plan= 0;
+
disable_jbuf= disable_jbuf || idx == join->const_tables;
trace_wrapper.add_table_name(s);
@@ -8066,7 +8089,7 @@ best_access_path(JOIN *join,
loose_scan_opt.init(join, s, remaining_tables);
if (table->is_splittable())
- spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+ best.spl_plan= s->choose_best_splitting(record_count, remaining_tables);
if (unlikely(thd->trace_started()))
{
@@ -8077,10 +8100,10 @@ best_access_path(JOIN *join,
if (s->keyuse)
{ /* Use key if possible */
- KEYUSE *keyuse;
- KEYUSE *start_key=0;
- double best_records= DBL_MAX, index_only_cost= DBL_MAX;
+ KEYUSE *keyuse, *start_key= 0;
+ double index_only_cost= DBL_MAX;
uint max_key_part=0;
+ enum join_type type= JT_UNKNOWN;
/* Test how we can use keys */
rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key
@@ -8102,7 +8125,7 @@ best_access_path(JOIN *join,
key_part_map ref_or_null_part= 0;
key_part_map all_parts= 0;
double startup_cost= s->startup_cost;
- double records_after_filter;
+ double records_after_filter, records_best_filter, records;
Range_rowid_filter_cost_info *filter= 0;
if (is_hash_join_key_no(key))
@@ -8333,7 +8356,6 @@ best_access_path(JOIN *join,
((double) (table->s->max_key_length-keyinfo->key_length) /
(double) table->s->max_key_length)));
set_if_smaller(records, (double)s->records);
- set_if_smaller(records_out, records);
if (records < 2.0)
records=2.0; /* Can't be as good as a unique */
}
@@ -8400,6 +8422,8 @@ best_access_path(JOIN *join,
(!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) ||
found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts)))
{
+ double extra_cost= 0;
+
max_key_part= max_part_bit(found_part);
/*
ReuseRangeEstimateForRef-3:
@@ -8524,7 +8548,7 @@ best_access_path(JOIN *join,
a*keyinfo->user_defined_key_parts - rec_per_key)/
(keyinfo->user_defined_key_parts-1);
else
- records= a;
+ records= rows2double(s->records);
set_if_bigger(records, MIN_ROWS_AFTER_FILTERING);
}
}
@@ -8533,6 +8557,7 @@ best_access_path(JOIN *join,
{
/* We need to do two key searches to find row */
records *= 2.0;
+ extra_cost= s->table->file->KEY_LOOKUP_COST;
}
/*
@@ -8562,13 +8587,14 @@ best_access_path(JOIN *join,
}
/* Limit the number of matched rows */
+ set_if_smaller(records, (double) s->records);
tmp= records;
set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
INDEX_READ_COST cost= cost_for_index_read(thd, table, key,
(ha_rows) tmp,
(ha_rows) s->worst_seeks);
tmp= cost.read_cost;
- index_only_cost= cost.index_only_cost;
+ index_only_cost= cost.index_only_cost+extra_cost;
}
else
{
@@ -8590,7 +8616,7 @@ best_access_path(JOIN *join,
if (records == DBL_MAX) // Key not usable
continue;
- records_after_filter= records;
+ records_best_filter= records_after_filter= records;
/*
Check that start_key->key can be used for index access
@@ -8604,7 +8630,8 @@ best_access_path(JOIN *join,
tmp,
index_only_cost,
record_count,
- &records_out);
+ &records_best_filter);
+ set_if_smaller(best.records_out, records_best_filter);
if (filter)
filter= filter->apply_filter(thd, table, &tmp, &records_after_filter,
&startup_cost,
@@ -8625,20 +8652,31 @@ best_access_path(JOIN *join,
The COST_EPS is here to ensure we use the first key if there are
two 'identical keys' that could be used.
*/
- if (tmp + COST_EPS < best_cost)
+ if (tmp + COST_EPS < best.cost)
{
trace_access_idx.add("chosen", true);
- best_cost= tmp;
+ best.cost= tmp;
/*
We use 'records' instead of 'records_after_filter' here as we want
to have EXPLAIN print the number of rows found by the key access.
*/
- best_records= records; // Records before filter!
- best_key= start_key;
- best_max_key_part= max_key_part;
- best_ref_depends_map= found_ref;
- best_filter= filter;
- best_type= type;
+ best.records= records; // Records before filter!
+ best.records_read= records;
+ /*
+ If we are using 'use_cond_selectivity > 1' then
+ table_after_join_selectivity() may take into account other
+ filters that what is currently used so we have to use
+ records_after_filter. If 'use_cond_selectivity <= 1 then we
+ can use information from the best filter.
+ */
+ best.records_after_filter= ((use_cond_selectivity > 1) ?
+ records_after_filter :
+ records_best_filter);
+ best.key= start_key;
+ best.max_key_part= max_key_part;
+ best.ref_depends_map= found_ref;
+ best.filter= filter;
+ best.type= type;
}
else if (unlikely(thd->trace_started()))
{
@@ -8646,9 +8684,8 @@ best_access_path(JOIN *join,
add("chosen", false).
add("cause", cause ? cause : "cost");
}
- set_if_smaller(records_out, records);
+ set_if_smaller(best.records_out, records);
} /* for each key */
- records= best_records;
}
else
{
@@ -8671,7 +8708,7 @@ best_access_path(JOIN *join,
/* Add dependency for sub queries */
key_dependent|= s->embedded_dependent;
- } /* if (s->keyuse) */
+ } /* if (s->keyuse) */
/* Check that s->key_dependent contains all used_tables found in s->keyuse */
@@ -8687,7 +8724,7 @@ best_access_path(JOIN *join,
(1) s is inner table of semi-join -> join cache is allowed for semijoins
(2) s is inner table of outer join -> join cache is allowed for outer joins
*/
- if (idx > join->const_tables && best_key == 0 &&
+ if (idx > join->const_tables && best.key == 0 &&
(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
join->max_allowed_join_cache_level > 2 &&
!bitmap_is_clear_all(eq_join_set) && !disable_jbuf &&
@@ -8696,11 +8733,11 @@ best_access_path(JOIN *join,
(!(table->map & join->outer_join) ||
join->allowed_outer_join_with_cache)) // (2)
{
- double refills, cmp_time;
+ double refills, row_copy_cost, cmp_time;
/* Estimate the cost of the hash join access to the table */
- double rnd_records= matching_candidates_in_table(s, found_constraint,
+ double rnd_records= matching_candidates_in_table(s, 0,
use_cond_selectivity);
- set_if_smaller(records_out, rnd_records);
+ set_if_smaller(best.records_out, rnd_records);
/*
The following cost calculation is identical to the cost calculation for
@@ -8729,18 +8766,22 @@ best_access_path(JOIN *join,
We assume here that, thanks to the hash, we don't have to compare all
row combinations, only a HASH_FANOUT (10%) rows in the cache.
*/
- cmp_time= (rnd_records * record_count * HASH_FANOUT *
- (ROW_COPY_COST_THD(thd) * JOIN_CACHE_ROW_COPY_COST_FACTOR +
+ row_copy_cost= (ROW_COPY_COST_THD(thd) * 2 *
+ JOIN_CACHE_ROW_COPY_COST_FACTOR(thd));
+ cmp_time= (record_count * row_copy_cost +
+ rnd_records * record_count * HASH_FANOUT *
+ ((idx - join->const_tables) * row_copy_cost +
WHERE_COST_THD(thd)));
tmp= COST_ADD(tmp, cmp_time);
- best_cost= tmp;
- records= rnd_records;
- best_key= hj_start_key;
- best_ref_depends_map= 0;
- best_uses_jbuf= TRUE;
- best_filter= 0;
- best_type= JT_HASH;
+ best.cost= tmp;
+ best.records_read= best.records_after_filter= rows2double(s->records);
+ best.records= rnd_records;
+ best.key= hj_start_key;
+ best.ref_depends_map= 0;
+ best.uses_jbuf= TRUE;
+ best.filter= 0;
+ best.type= JT_HASH;
Json_writer_object trace_access_hash(thd);
if (unlikely(trace_access_hash.trace_started()))
trace_access_hash.
@@ -8748,7 +8789,7 @@ best_access_path(JOIN *join,
add("index", "hj-key").
add("rows", rnd_records).
add("refills", refills).
- add("cost", best_cost).
+ add("cost", best.cost).
add("chosen", true);
}
@@ -8788,21 +8829,25 @@ best_access_path(JOIN *join,
be used for cases with small datasets, which is annoying.
*/
Json_writer_object trace_access_scan(thd);
- if ((records >= s->found_records || best_cost > s->read_time) && // (1)
- !(best_key && best_key->key == MAX_KEY) && // (2)
+ if ((best.records_read >= s->found_records ||
+ best.cost > s->read_time) && // (1)
+ !(best.key && best.key->key == MAX_KEY) && // (2)
!(s->quick &&
s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2)
- best_key && s->quick->index == best_key->key && // (2)
- best_max_key_part >= table->opt_range[best_key->key].key_parts) &&// (2)
- !((table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3)
- !table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3)
- !(table->force_index_join && best_key && !s->quick) && // (4)
- !(best_key && table->pos_in_table_list->jtbm_subselect)) // (5)
+ best.key && s->quick->index == best.key->key && // (2)
+ best.max_key_part >= table->opt_range[best.key->key].key_parts) &&// (2)
+ !((table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3)
+ !table->covering_keys.is_clear_all() && best.key && !s->quick) &&// (3)
+ !(table->force_index_join && best.key && !s->quick) && // (4)
+ !(best.key && table->pos_in_table_list->jtbm_subselect)) // (5)
{ // Check full join
- double rnd_records, records_after_filter, org_records;
+ double records_after_filter, org_records;
+ double records_best_filter;
Range_rowid_filter_cost_info *filter= 0;
double startup_cost= s->startup_cost;
const char *scan_type= "";
+ enum join_type type;
+ uint forced_index= MAX_KEY;
/*
Range optimizer never proposes a RANGE if it isn't better
@@ -8832,7 +8877,8 @@ best_access_path(JOIN *join,
This is done to make records found comparable to what we get with
'ref' access.
*/
- org_records= records_after_filter= rnd_records= rows2double(s->found_records);
+ org_records= records_after_filter= rows2double(s->found_records);
+ records_best_filter= org_records;
if (s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
{
@@ -8850,11 +8896,13 @@ best_access_path(JOIN *join,
range->cost / s->quick->read_time >= 0.9999999));
filter=
- table->best_range_rowid_filter_for_partial_join(key_no, rows2double(range->rows),
+ table->best_range_rowid_filter_for_partial_join(key_no,
+ rows2double(range->rows),
range->find_cost,
range->index_only_cost,
record_count,
- &records_out);
+ &records_best_filter);
+ set_if_smaller(best.records_out, records_best_filter);
if (filter)
{
double filter_cost= range->fetch_cost;
@@ -8883,20 +8931,18 @@ best_access_path(JOIN *join,
{
type= JT_INDEX_MERGE;
}
- set_if_smaller(records_out, records_after_filter);
loose_scan_opt.check_range_access(join, idx, s->quick);
}
else
{
/* We will now calculate cost of scan, with or without join buffer */
- rnd_records= matching_candidates_in_table(s, found_constraint,
- use_cond_selectivity);
- records_after_filter= rnd_records;
- set_if_smaller(records_out, rnd_records);
+ records_after_filter= matching_candidates_in_table(s, 0,
+ use_cond_selectivity);
+ DBUG_ASSERT(records_after_filter <= s->records);
- org_records= rows2double(s->records);
+ set_if_smaller(best.records_out, records_after_filter);
- DBUG_ASSERT(rnd_records <= s->records);
+ org_records= rows2double(s->records);
/* Estimate cost of reading table. */
if (s->cached_forced_index_type)
@@ -8907,7 +8953,7 @@ best_access_path(JOIN *join,
}
else
{
- if (table->force_index_join && !best_key)
+ if (table->force_index_join && !best.key)
{
/*
The query is using 'forced_index' and we did not find a usable key.
@@ -8951,6 +8997,7 @@ best_access_path(JOIN *join,
tmp= s->cached_scan_and_compare_time;
type= JT_ALL;
}
+ /* Cache result for other calls */
s->cached_forced_index_type= type;
s->cached_forced_index_cost= tmp;
s->cached_forced_index= forced_index;
@@ -8977,7 +9024,7 @@ best_access_path(JOIN *join,
else
{
/* Scan trough join cache */
- double cmp_time, refills;
+ double cmp_time, row_copy_cost, refills;
/*
Calculate cost of checking the the WHERE for this table.
@@ -8995,13 +9042,16 @@ best_access_path(JOIN *join,
/* We come here only if there are already rows in the join cache */
DBUG_ASSERT(idx != join->const_tables);
/*
- Cost of moving each row from each previous table from the join cache
- to it's table record and comparing it with the found and accepted
- row.
+ Cost of:
+ - Copying all previous record combinations to the join cache
+ - Copying the tables from the join cache to table records
+ - Checking the WHERE against the final row combination
*/
- cmp_time= (rnd_records * record_count *
- (ROW_COPY_COST_THD(thd) * (idx - join->const_tables) *
- JOIN_CACHE_ROW_COPY_COST_FACTOR +
+ row_copy_cost= (ROW_COPY_COST_THD(thd) *
+ JOIN_CACHE_ROW_COPY_COST_FACTOR(thd));
+ cmp_time= (record_count * row_copy_cost +
+ records_after_filter * record_count *
+ ((idx - join->const_tables) * row_copy_cost +
WHERE_COST_THD(thd)));
tmp= COST_ADD(tmp, cmp_time);
}
@@ -9017,10 +9067,10 @@ best_access_path(JOIN *join,
trace_access_scan.
add("access_type",
type == JT_ALL ? scan_type : join_type_str[type]).
- add("rows", org_records).
- add("rows_after_scan", rnd_records).
- add("rows_after_filter", records_after_filter).
- add("cost", tmp);
+ add("rows", org_records).
+ add("rows_after_filter", records_after_filter).
+ add("rows_out", best.records_out).
+ add("cost", tmp);
if (type == JT_ALL)
{
trace_access_scan.add("index_only",
@@ -9028,27 +9078,38 @@ best_access_path(JOIN *join,
}
}
- if (tmp + COST_EPS < best_cost)
+ if (tmp + COST_EPS < best.cost)
{
/*
If the table has a range (s->quick is set) make_join_select()
will ensure that this will be used
*/
- best_cost= tmp;
- records= rnd_records;
- best_key= 0;
- best_forced_index= forced_index;
+ best.cost= tmp;
+ best.records_read= org_records; // Records accessed
+ best.records= records_after_filter; // Records to be checked with WHERE
+ /*
+ If we are using 'use_cond_selectivity > 1' then
+ table_after_join_selectivity may take into account other
+ filters that what is currently used so we have to use
+ records_after_filter. If 'use_cond_selectivity <= 1 then we
+ can use information from the best filter.
+ */
+ best.records_after_filter= ((use_cond_selectivity > 1) ?
+ records_after_filter :
+ records_best_filter);
+ best.key= 0;
+ best.forced_index= forced_index;
/*
filter is only set if
s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE
*/
- best_filter= filter;
+ best.filter= filter;
/* range/index_merge/ALL/index access method are "independent", so: */
- best_ref_depends_map= 0;
- best_uses_jbuf= MY_TEST(!disable_jbuf && !((table->map &
+ best.ref_depends_map= 0;
+ best.uses_jbuf= MY_TEST(!disable_jbuf && !((table->map &
join->outer_join)));
- spl_plan= 0;
- best_type= type;
+ best.spl_plan= 0;
+ best.type= type;
trace_access_scan.add("chosen", true);
}
else
@@ -9063,29 +9124,33 @@ best_access_path(JOIN *join,
add("cause", "cost");
}
+ crash_if_first_double_is_bigger(best.records_out, best.records);
+ crash_if_first_double_is_bigger(best.records_out, best.records_read);
+
/* Update the cost information for the current partial plan */
- crash_if_first_double_is_bigger(records_out, records);
- pos->records_read= records;
- pos->records_out= records_out;
- pos->read_time= best_cost;
- pos->key= best_key;
- pos->forced_index= best_forced_index;
- pos->type= best_type;
+ pos->records_init= best.records_read;
+ pos->records_after_filter= best.records_after_filter;
+ pos->records_read= best.records;
+ pos->records_out= best.records_out;
+ pos->read_time= best.cost;
+ pos->key= best.key;
+ pos->forced_index= best.forced_index;
+ pos->type= best.type;
pos->table= s;
- pos->ref_depend_map= best_ref_depends_map;
+ pos->ref_depend_map= best.ref_depends_map;
pos->loosescan_picker.loosescan_key= MAX_KEY;
- pos->use_join_buffer= best_uses_jbuf;
- pos->spl_plan= spl_plan;
- pos->range_rowid_filter_info= best_filter;
- pos->key_dependent= (best_type == JT_EQ_REF ? (table_map) 0 :
+ pos->use_join_buffer= best.uses_jbuf;
+ pos->spl_plan= best.spl_plan;
+ pos->range_rowid_filter_info= best.filter;
+ pos->key_dependent= (best.type == JT_EQ_REF ? (table_map) 0 :
key_dependent & remaining_tables);
loose_scan_opt.save_to_position(s, loose_scan_pos);
- if (!best_key &&
- idx == join->const_tables &&
+ if (!best.key &&
+ idx == join->const_tables && // First table
table == join->sort_by_table &&
- join->unit->lim.get_select_limit() >= records)
+ join->unit->lim.get_select_limit() >= best.records) // QQQ Why?
{
trace_access_scan.add("use_tmp_table", true);
join->sort_by_table= (TABLE*) 1; // Must use temporary table
@@ -9320,15 +9385,6 @@ choose_plan(JOIN *join, table_map join_tables, TABLE_LIST *emb_sjm_nest)
DBUG_RETURN(TRUE);
}
- /*
- Store the cost of this query into a user variable
- Don't update last_query_cost for statements that are not "flat joins" :
- i.e. they have subqueries, unions or call stored procedures.
- TODO: calculate a correct cost for a query with subqueries and UNIONs.
- */
- if (join->thd->lex->is_single_level_stmt())
- join->thd->status_var.last_query_cost= join->best_read;
-
join->emb_sjm_nest= 0;
DBUG_RETURN(FALSE);
}
@@ -9595,6 +9651,8 @@ optimize_straight_join(JOIN *join, table_map remaining_tables)
{
POSITION *position= join->positions + idx;
Json_writer_object trace_one_table(thd);
+ double original_record_count, current_record_count;
+
if (unlikely(thd->trace_started()))
trace_plan_prefix(join, idx, remaining_tables);
/* Find the best access method from 's' to the current partial plan */
@@ -9603,22 +9661,71 @@ optimize_straight_join(JOIN *join, table_map remaining_tables)
position, &loose_scan_pos);
/* Compute the cost of the new plan extended with 's' */
- record_count= COST_MULT(record_count, position->records_read);
+ current_record_count= COST_MULT(record_count, position->records_out);
read_time= COST_ADD(read_time, position->read_time);
- optimize_semi_joins(join, remaining_tables, idx, &record_count, &read_time,
- &loose_scan_pos);
+ original_record_count= current_record_count;
+ optimize_semi_joins(join, remaining_tables, idx, &current_record_count,
+ &read_time, &loose_scan_pos);
+ if (position->sj_strategy != SJ_OPT_NONE && original_record_count)
+ {
+ /* Adjust records_out to contain the final number of rows */
+ double ratio= current_record_count / original_record_count;
+ /* QQQ This is just to stop an assert later */
+ if (ratio < 1)
+ position->records_out*= ratio;
+ }
+
remaining_tables&= ~(s->table->map);
- double pushdown_cond_selectivity= 1.0;
- if (use_cond_selectivity > 1)
+ if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE)
+ {
+ double pushdown_cond_selectivity, records_out;
pushdown_cond_selectivity= table_after_join_selectivity(join, idx, s,
- remaining_tables);
- position->cond_selectivity= pushdown_cond_selectivity;
+ remaining_tables,
+ &records_out);
+ if (unlikely(thd->trace_started()) &&
+ pushdown_cond_selectivity != 1.0)
+ {
+ trace_one_table.
+ add("pushdown_cond_selectivity", pushdown_cond_selectivity).
+ add("rows_out", records_out);
+ }
+ position->cond_selectivity= pushdown_cond_selectivity;
+ position->records_out= records_out;
+ current_record_count= COST_MULT(record_count, records_out);
+ }
+ else
+ position->cond_selectivity= 1.0;
++idx;
+ record_count= current_record_count;
}
if (join->sort_by_table &&
join->sort_by_table != join->positions[join->const_tables].table->table)
- read_time+= record_count; // We have to make a temp table
+ {
+ /*
+ We may have to make a temp table, note that this is only a
+ heuristic since we cannot know for sure at this point if we
+ we are going to use addon fields or to have flush sorting to
+ disk. We also don't know the temporary table will be in memory
+ or disk.
+ The following calculation takes a middle ground where assume
+ we can sort the keys in memory but have to use a disk based
+ temporary table to retrive the rows.
+ This cost is probably much bigger than it has to be...
+ */
+ double sort_cost;
+ sort_cost= (get_qsort_sort_cost((ha_rows)record_count, 0) +
+ record_count *
+ DISK_TEMPTABLE_LOOKUP_COST(thd));
+ {
+ if (unlikely(thd->trace_started()))
+ {
+ Json_writer_object trace_one_table(thd);
+ trace_one_table.add("estimated_cost_for_sorting", sort_cost);
+ }
+ }
+ read_time= COST_ADD(read_time, sort_cost);
+ }
memcpy((uchar*) join->best_positions, (uchar*) join->positions,
sizeof(POSITION)*idx);
join->join_record_count= record_count;
@@ -9997,8 +10104,7 @@ double JOIN::get_examined_rows()
COST_MULT((double) (tab->get_examined_rows()), prev_fanout));
prev_tab= tab;
}
- examined_rows= (double)
- (records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records);
+ examined_rows= double_to_rows(records);
return examined_rows;
}
@@ -10129,9 +10235,10 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
@brief
Get the selectivity of conditions when joining a table
- @param join The optimized join
- @param s The table to be joined for evaluation
- @param rem_tables The bitmap of tables to be joined later
+ @param join The optimized join
+ @param s The table to be joined for evaluation
+ @param rem_tables The bitmap of tables to be joined later
+ @param new_records_out OUT Set to number of rows accepted
@detail
Get selectivity of conditions that can be applied when joining this table
@@ -10145,12 +10252,14 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
condition, "COND(this_table) AND COND(this_table, previous_tables)".
@retval
- selectivity of the conditions imposed on the rows of s
+ selectivity of the conditions imposed on the rows of s related to
+ the rows that we are expected to read (position->records_init).
*/
static
double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
- table_map rem_tables)
+ table_map rem_tables,
+ double *new_records_out)
{
uint16 ref_keyuse_steps_buf[MAX_REF_PARTS];
uint ref_keyuse_size= MAX_REF_PARTS;
@@ -10158,13 +10267,14 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
Field *field;
TABLE *table= s->table;
MY_BITMAP *read_set= table->read_set;
- double sel= table->cond_selectivity;
POSITION *pos= &join->positions[idx];
+ double sel, records_out= pos->records_out;
uint keyparts= 0;
uint found_part_ref_or_null= 0;
if (pos->key != 0)
{
+ sel= table->cond_selectivity;
/*
A ref access or hash join is used for this table. ref access is created
from
@@ -10338,35 +10448,22 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
}
keyuse++;
}
- }
- else
- {
/*
- The table is accessed with full table scan, or quick select.
- Selectivity of COND(table) is already accounted for in
- matching_candidates_in_table().
- */
- sel= 1.0;
- }
+ If the field f from the table is equal to a field from one the
+ earlier joined tables then the selectivity of the range conditions
+ over the field f must be discounted.
- /*
- If the field f from the table is equal to a field from one the
- earlier joined tables then the selectivity of the range conditions
- over the field f must be discounted.
-
- We need to discount selectivity only if we're using ref-based
- access method (and have sel!=1).
- If we use ALL/range/index_merge, then sel==1, and no need to discount.
- */
- if (pos->key != NULL)
- {
+ We need to discount selectivity only if we're using ref-based
+ access method (and have sel!=1).
+ If we use ALL/range/index_merge, then sel==1, and no need to discount.
+ */
for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
{
if (!bitmap_is_set(read_set, field->field_index) ||
!field->next_equal_field)
- continue;
- for (Field *next_field= field->next_equal_field;
- next_field != field;
+ continue;
+ for (Field *next_field= field->next_equal_field;
+ next_field != field;
next_field= next_field->next_equal_field)
{
if (!(next_field->table->map & rem_tables) &&
@@ -10381,14 +10478,39 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
}
}
}
+ /*
+ We have now calculated a more exact 'records_out' taking more index
+ costs into account.
+ pos->records_out previously contained the smallest record count for
+ all range or ref access, which should not be smaller than what we
+ calculated above.
+ */
+ records_out= pos->records_after_filter * sel;
+ set_if_smaller(records_out, pos->records_out);
}
- sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables,
+ sel= table_multi_eq_cond_selectivity(join, idx, s, rem_tables,
keyparts, ref_keyuse_steps);
+ records_out*= sel;
+
+ /*
+ Update sel to be relative pos->records_read as that is what some old
+ code expects. Newer code should just use 'position->records_out' instead.
+ */
+ if (pos->records_read == 0)
+ sel= 1.0;
+ else
+ {
+ sel= records_out / pos->records_read;
+ DBUG_ASSERT(sel >= 0.0 and sel <= 1.00001);
+ if (sel > 1.0)
+ sel= 1.0;
+ }
+
exit:
+ *new_records_out= records_out;
if (ref_keyuse_steps != ref_keyuse_steps_buf)
my_free(ref_keyuse_steps);
- DBUG_ASSERT(sel >= 0.0 and sel <= 1.0);
return sel;
}
@@ -10407,7 +10529,7 @@ check_if_edge_table(POSITION *pos,
if ((pos->type == JT_EQ_REF ||
(pos->type == JT_REF &&
- pos->records_read == 1 &&
+ pos->records_init == 1 &&
!pos->range_rowid_filter_info)) &&
pushdown_cond_selectivity >= 0.999)
return SEARCH_FOUND_EDGE;
@@ -10600,7 +10722,7 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx,
// pplan_cost already too great, stop search
continue;
- pplan= expand pplan by best_access_method;
+ pplan= expand plan by best_access_method;
remaining_tables= remaining_tables - table T;
if (remaining_tables is not an empty set
and
@@ -10671,8 +10793,8 @@ best_extension_by_limited_search(JOIN *join,
{
THD *thd= join->thd;
/*
- 'join' is a partial plan with lower cost than the best plan so far,
- so continue expanding it further with the tables in 'remaining_tables'.
+ 'join' is a partial plan with lower cost than the best plan so far,
+ so continue expanding it further with the tables in 'remaining_tables'.
*/
JOIN_TAB *s;
double best_record_count= DBL_MAX;
@@ -10689,14 +10811,14 @@ best_extension_by_limited_search(JOIN *join,
if (dbug_user_var_equals_int(thd,
"show_explain_probe_select_id",
join->select_lex->select_number))
- dbug_serve_apcs(thd, 1);
- );
+ dbug_serve_apcs(thd, 1);
+ );
if (unlikely(thd->check_killed())) // Abort
DBUG_RETURN(SEARCH_ABORT);
DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
- "part_plan"););
+ "part_plan"););
status_var_increment(thd->status_var.optimizer_join_prefixes_check_calls);
if (join->emb_sjm_nest)
@@ -10785,7 +10907,7 @@ best_extension_by_limited_search(JOIN *join,
!check_interleaving_with_nj(s))
{
table_map real_table_bit= s->table->map;
- double current_record_count, current_read_time;
+ double current_record_count, current_read_time, original_record_count;
double partial_join_cardinality;
POSITION *position= join->positions + idx, *loose_scan_pos;
double pushdown_cond_selectivity;
@@ -10802,7 +10924,7 @@ best_extension_by_limited_search(JOIN *join,
loose_scan_pos= pos->position+1;
/* Compute the cost of the new plan extended with 's' */
- current_record_count= COST_MULT(record_count, position->records_read);
+ current_record_count= COST_MULT(record_count, position->records_out);
current_read_time= COST_ADD(read_time, position->read_time);
if (unlikely(trace_one_table.trace_started()))
@@ -10811,9 +10933,22 @@ best_extension_by_limited_search(JOIN *join,
add("rows_for_plan", current_record_count).
add("cost_for_plan", current_read_time);
}
+ original_record_count= current_record_count;
optimize_semi_joins(join, remaining_tables, idx, &current_record_count,
&current_read_time, loose_scan_pos);
-
+ if (position->sj_strategy != SJ_OPT_NONE)
+ {
+ /* Adjust records_out and current_record_count after semi join */
+ double ratio= current_record_count / original_record_count;
+ /* QQQ This is just to stop an assert later */
+ if (ratio < 1.0)
+ position->records_out*= ratio;
+ if (unlikely(trace_one_table.trace_started()))
+ {
+ trace_one_table.add("sj_rows_out", position->records_out);
+ trace_one_table.add("sj_rows_for_plan", current_record_count);
+ }
+ }
/* Expand only partial plans with lower cost than the best QEP so far */
if (current_read_time + COST_EPS >= join->best_read)
{
@@ -10864,15 +10999,15 @@ best_extension_by_limited_search(JOIN *join,
if (best_record_count > current_record_count ||
best_read_time > current_read_time ||
(idx == join->const_tables && // 's' is the first table in the QEP
- s->table == join->sort_by_table))
+ s->table == join->sort_by_table))
{
/*
Store the current record count and cost as the best
possible cost at this level if the following holds:
- It's the lowest record number and cost so far
- - There is no remaing table that could improve index usage
- or we found an EQ_REF or REF key with less than 2
- matching records (good enough).
+ - There is no remaing table that could improve index usage
+ or we found an EQ_REF or REF key with less than 2
+ matching records (good enough).
*/
if (best_record_count >= current_record_count &&
best_read_time >= current_read_time &&
@@ -10924,17 +11059,26 @@ best_extension_by_limited_search(JOIN *join,
}
pushdown_cond_selectivity= 1.0;
- if (use_cond_selectivity > 1)
+ /*
+ TODO: When a semi-join strategy is applied (sj_strategy!=SJ_OPT_NONE),
+ we should account for selectivity from table_after_join_selectivity().
+ (Condition filtering is performed before the semi-join removes some
+ fanout so this might require moving the code around)
+ */
+ if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE)
+ {
pushdown_cond_selectivity=
table_after_join_selectivity(join, idx, s,
- remaining_tables & ~real_table_bit);
+ remaining_tables & ~real_table_bit,
+ &position->records_out);
+ }
join->positions[idx].cond_selectivity= pushdown_cond_selectivity;
- partial_join_cardinality= (current_record_count *
- pushdown_cond_selectivity);
+ partial_join_cardinality= record_count * position->records_out;
- if (unlikely(thd->trace_started()) && pushdown_cond_selectivity < 1.0)
+ if (unlikely(thd->trace_started()) && pushdown_cond_selectivity < 1.0 &&
+ partial_join_cardinality < current_record_count)
trace_one_table
.add("selectivity", pushdown_cond_selectivity)
.add("estimated_join_cardinality", partial_join_cardinality);
@@ -10979,11 +11123,21 @@ best_extension_by_limited_search(JOIN *join,
{
/*
We may have to make a temp table, note that this is only a
- heuristic since we cannot know for sure at this point.
- Hence it may be wrong.
+ heuristic since we cannot know for sure at this point if we
+ we are going to use addon fields or to have flush sorting to
+ disk. We also don't know the temporary table will be in memory
+ or disk.
+ The following calculation takes a middle ground where assume
+ we can sort the keys in memory but have to use a disk based
+ temporary table to retrive the rows.
+ This cost is probably much bigger than it has to be...
*/
- trace_one_table.add("cost_for_sorting", current_record_count);
- current_read_time= COST_ADD(current_read_time, current_record_count);
+ double sort_cost;
+ sort_cost= (get_qsort_sort_cost((ha_rows)current_record_count,0) +
+ current_record_count *
+ DISK_TEMPTABLE_LOOKUP_COST(thd));
+ trace_one_table.add("cost_for_sorting", sort_cost);
+ current_read_time= COST_ADD(current_read_time, sort_cost);
}
if (current_read_time < join->best_read)
{
@@ -11318,11 +11472,8 @@ prev_record_reads(const POSITION *positions, uint idx, table_map found_ref)
is an inprecise estimate and adding 1 (or, in the worst case,
#max_nested_outer_joins=64-1) will not make it any more precise.
*/
- if (pos->records_read)
- {
- found= COST_MULT(found, pos->records_read);
- found*= pos->cond_selectivity;
- }
+ if (pos->records_out)
+ found= COST_MULT(found, pos->records_out);
}
}
return found;
@@ -11752,7 +11903,7 @@ bool JOIN::get_best_combination()
*/
SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info;
j->records_read= (sjm->is_sj_scan? sjm->rows : 1.0);
- j->records_out= j->records_read;
+ j->records_init= j->records_out= j->records_read;
j->records= (ha_rows) j->records_read;
j->cond_selectivity= 1.0;
JOIN_TAB *jt;
@@ -11787,6 +11938,7 @@ bool JOIN::get_best_combination()
if (j->type == JT_SYSTEM)
goto loop_end;
+
if (!(keyuse= cur_pos->key))
{
if (cur_pos->type == JT_NEXT) // Forced index
@@ -11807,17 +11959,19 @@ bool JOIN::get_best_combination()
j->range_rowid_filter_info=
cur_pos->range_rowid_filter_info;
- loop_end:
- /*
+ /*
Save records_read in JOIN_TAB so that select_describe()/etc don't have
to access join->best_positions[].
*/
+ j->records_init= cur_pos->records_init;
j->records_read= cur_pos->records_read;
j->records_out= cur_pos->records_out;
+
+ loop_end:
j->cond_selectivity= cur_pos->cond_selectivity;
DBUG_ASSERT(j->cond_selectivity <= 1.0);
crash_if_first_double_is_bigger(j->records_out,
- j->records_read *
+ j->records_init *
(j->range_rowid_filter_info ?
j->range_rowid_filter_info->selectivity :
1.0));
@@ -12580,7 +12734,10 @@ make_outerjoin_info(JOIN *join)
{
if (embedding->is_active_sjm())
{
- /* We're trying to walk out of an SJ-Materialization nest. Don't do this. */
+ /*
+ We're trying to walk out of an SJ-Materialization nest.
+ Don't do this.
+ */
break;
}
/* Ignore sj-nests: */
@@ -12861,8 +13018,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
tab->use_quick=1;
tab->ref.key= -1;
tab->ref.key_parts=0; // Don't use ref key.
- join->best_positions[i].records_read= rows2double(tab->quick->records);
- /*
+ join->best_positions[i].records_read=
+ join->best_positions[i].records_out=
+ rows2double(tab->quick->records);
+ /*
We will use join cache here : prevent sorting of the first
table only and sort at the end.
*/
@@ -14906,14 +15065,14 @@ void JOIN_TAB::cleanup()
/**
Estimate the time to get rows of the joined table
- Updates found_records, records, cached_scan_time, cached_covering_key,
- read_time and cache_scan_and_compare_time
+ Updates found_records, records, cached_covering_key, read_time and
+ cache_scan_and_compare_time
*/
void JOIN_TAB::estimate_scan_time()
{
THD *thd= join->thd;
- double copy_cost= ROW_COPY_COST_THD(thd);
+ double copy_cost;
cached_covering_key= MAX_KEY;
if (table->is_created())
@@ -14924,6 +15083,7 @@ void JOIN_TAB::estimate_scan_time()
&startup_cost);
table->opt_range_condition_rows= records;
table->used_stat_records= records;
+ copy_cost= table->file->ROW_COPY_COST;
}
else
{
@@ -14937,21 +15097,38 @@ void JOIN_TAB::estimate_scan_time()
if (!table->covering_keys.is_clear_all() && ! table->no_keyread)
{
cached_covering_key= find_shortest_key(table, &table->covering_keys);
- read_time= table->file->ha_key_scan_time(cached_covering_key);
- copy_cost= KEY_COPY_COST_THD(thd);
+ read_time= table->file->ha_key_scan_time(cached_covering_key, records);
+ copy_cost= 0; // included in ha_key_scan_time
}
else
- read_time= table->file->ha_scan_time();
+ {
+ read_time= table->file->ha_scan_time(records);
+ copy_cost= 0;
+ }
}
}
else
{
+ /*
+ The following is same as calling
+ TABLE_SHARE::update_optimizer_costs, but without locks
+ */
+ if (table->s->db_type() == heap_hton)
+ memcpy(&table->s->optimizer_costs, &heap_optimizer_costs,
+ sizeof(heap_optimizer_costs));
+ else
+ memcpy(&table->s->optimizer_costs, &tmp_table_optimizer_costs,
+ sizeof(tmp_table_optimizer_costs));
+ table->file->set_optimizer_costs(thd);
+ table->s->optimizer_costs_inited=1 ;
+
records= table->stat_records();
DBUG_ASSERT(table->opt_range_condition_rows == records);
- read_time= records ? (double) records: 10.0;// TODO:fix this stub
+ read_time= table->file->ha_scan_time(MY_MAX(records, 1000)); // Needs fix..
+ copy_cost= table->s->optimizer_costs.row_copy_cost;
}
+
found_records= records;
- cached_scan_time= read_time;
cached_scan_and_compare_time= (read_time + records *
(copy_cost + WHERE_COST_THD(thd)));
}
@@ -14996,7 +15173,7 @@ ha_rows JOIN_TAB::get_examined_rows()
}
}
else
- examined_rows= records_read;
+ examined_rows= records_init;
if (examined_rows >= (double) HA_ROWS_MAX)
return HA_ROWS_MAX;
@@ -18496,7 +18673,7 @@ table_map JOIN::get_allowed_nj_tables(uint idx)
first_alt TRUE <=> Use the LooseScan plan for the first_tab
no_jbuf_before Don't allow to use join buffering before this
table
- reopt_rec_count OUT New output record count
+ outer_rec_count OUT New output record count
reopt_cost OUT New join prefix cost
DESCRIPTION
@@ -18551,6 +18728,8 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables;
join->cur_sj_inner_tables= 0;
+ double inner_fanout= 1.0;
+
for (i= first_tab; i <= last_tab; i++)
{
JOIN_TAB *rs= join->positions[i].table;
@@ -18563,31 +18742,43 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
join->positions, i,
TRUE, rec_count,
&pos, &loose_scan_pos);
+ if ((i == first_tab && first_alt))
+ pos= loose_scan_pos;
}
else
pos= join->positions[i];
- if ((i == first_tab && first_alt))
- pos= loose_scan_pos;
-
reopt_remaining_tables &= ~rs->table->map;
- rec_count= COST_MULT(rec_count, pos.records_read);
cost= COST_ADD(cost, pos.read_time);
- //TODO: take into account join condition selectivity here
- double pushdown_cond_selectivity= 1.0;
- table_map real_table_bit= rs->table->map;
- if (join->thd->variables.optimizer_use_condition_selectivity > 1)
+
+ double records_out= pos.records_out;
+ /*
+ The (i != last_tab) is here to mimic what
+ best_extension_by_limited_search() does: do not call
+ table_after_join_selectivity() for the join_tab where the semi-join
+ strategy is applied
+ */
+ if (i != last_tab &&
+ join->thd->variables.optimizer_use_condition_selectivity > 1)
{
+ table_map real_table_bit= rs->table->map;
+ double __attribute__((unused)) pushdown_cond_selectivity;
pushdown_cond_selectivity=
table_after_join_selectivity(join, i, rs,
reopt_remaining_tables &
- ~real_table_bit);
+ ~real_table_bit, &records_out);
}
- (*outer_rec_count) *= pushdown_cond_selectivity;
- if (!rs->emb_sj_nest)
- *outer_rec_count= COST_MULT(*outer_rec_count, pos.records_read);
+ rec_count= COST_MULT(rec_count, records_out);
+ *outer_rec_count= COST_MULT(*outer_rec_count, records_out);
+ if (rs->emb_sj_nest)
+ inner_fanout= COST_MULT(inner_fanout, records_out);
}
+
+ /* Discount the fanout produced by the subquery */
+ if (inner_fanout > 1.0)
+ *outer_rec_count /= inner_fanout;
+
join->cur_sj_inner_tables= save_cur_sj_inner_tables;
*reopt_cost= cost;
@@ -20828,7 +21019,7 @@ TABLE *create_tmp_table_for_schema(THD *thd, TMP_TABLE_PARAM *param,
{
TABLE *table;
Create_tmp_table maker((ORDER *) NULL, false, false,
- select_options, HA_POS_ERROR);
+ select_options, HA_ROWS_MAX);
if (!(table= maker.start(thd, param, &table_alias)) ||
maker.add_schema_fields(thd, table, param, schema_table) ||
maker.finalize(thd, table, param, do_not_open, keep_row_order))
@@ -21008,7 +21199,6 @@ bool Virtual_tmp_table::sp_set_all_fields_from_item(THD *thd, Item *value)
return false;
}
-
bool open_tmp_table(TABLE *table)
{
int error;
@@ -21022,6 +21212,7 @@ bool open_tmp_table(TABLE *table)
}
table->db_stat= HA_OPEN_KEYFILE;
(void) table->file->extra(HA_EXTRA_QUICK); /* Faster */
+ table->file->set_optimizer_costs(table->in_use);
if (!table->is_created())
{
table->set_created();
@@ -24702,31 +24893,40 @@ ok:
@return
MAX_KEY no suitable key found
key index otherwise
+
+ @notes
+ We should not use keyread_time() as in the case of disk_read_cost= 0
+ all keys would be regarded equal.
*/
uint find_shortest_key(TABLE *table, const key_map *usable_keys)
{
- double min_cost= DBL_MAX;
+ size_t min_length= INT_MAX32;
uint best= MAX_KEY;
- if (!usable_keys->is_clear_all())
+ uint possible_keys= usable_keys->bits_set();
+
+ if (possible_keys)
{
+ if (possible_keys == 1)
+ return usable_keys->find_first_bit();
+
for (uint nr=0; nr < table->s->keys ; nr++)
{
if (usable_keys->is_set(nr))
{
- double cost= table->file->ha_key_scan_time(nr);
- if (cost < min_cost)
+ size_t length= table->key_storage_length(nr);
+ if (length < min_length)
{
- min_cost= cost;
- best=nr;
+ min_length= length;
+ best= nr;
}
- DBUG_ASSERT(best < MAX_KEY);
}
}
}
return best;
}
+
/**
Test if a second key is the subkey of the first one.
@@ -28244,6 +28444,7 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
// psergey-todo: data for filtering!
tracker= &eta->tracker;
jbuf_tracker= &eta->jbuf_tracker;
+ jbuf_unpack_tracker= &eta->jbuf_unpack_tracker;
/* Enable the table access time tracker only for "ANALYZE stmt" */
if (thd->lex->analyze_stmt)
@@ -28472,12 +28673,13 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
ha_rows examined_rows= get_examined_rows();
eta->rows_set= true;
- eta->rows= examined_rows;
+ eta->rows= double_to_rows(examined_rows);
/* "filtered" */
float f= 0.0;
if (examined_rows)
{
+#ifdef OLD_CODE // QQQ
double pushdown_cond_selectivity= cond_selectivity;
if (pushdown_cond_selectivity != 1.0)
f= (float) (100.0 * pushdown_cond_selectivity);
@@ -28485,6 +28687,9 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
f= (float) (100.0 * range_rowid_filter_info->selectivity);
else
f= (float) (100.0 * records_read / examined_rows);
+#else
+ f= (float) (100.0 * records_out / examined_rows);
+#endif
}
set_if_smaller(f, 100.0);
eta->filtered_set= true;
@@ -28880,9 +29085,9 @@ int JOIN::save_explain_data_intern(Explain_query *output,
continue;
}
-
Explain_table_access *eta= (new (output->mem_root)
- Explain_table_access(output->mem_root));
+ Explain_table_access(output->mem_root,
+ thd->lex->analyze_stmt));
if (!eta)
DBUG_RETURN(1);
@@ -29922,7 +30127,7 @@ void JOIN::cache_const_exprs()
- If there is no quick select return the full cost from
cost_for_index_read() (Doing a full scan with up to 'limit' records)
- @param pos Result from best_acccess_path(). Is NULL for
+ @param pos Result from best_access_path(). Is NULL for
single-table UPDATE/DELETE
@param table Table to be sorted
@param keynr Which index to use
@@ -30008,7 +30213,7 @@ static bool get_range_limit_read_cost(const POSITION *pos,
/*
Calculate the number of rows we have to check if we are
- doing a full index scan (as a suitabe range scan was not available).
+ doing a full index scan (as a suitable range scan was not available).
We assume that each of the tested indexes is not correlated
with ref_key. Thus, to select first N records we have to scan
@@ -30197,12 +30402,12 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
trace_cheaper_ordering.add_table_name(tab);
else
trace_cheaper_ordering.add_table_name(table);
- trace_cheaper_ordering
- .add("rows_estimation", rows_estimate)
- .add("read_cost", read_time)
- .add("filesort_cost", filesort_cost)
- .add("filesort_type", filesort_names[filesort_type].str)
- .add("fanout", fanout);
+ trace_cheaper_ordering.
+ add("rows_estimation", rows_estimate).
+ add("filesort_cost", filesort_cost).
+ add("read_cost", read_time).
+ add("filesort_type", filesort_names[filesort_type].str).
+ add("fanout", fanout);
}
Json_writer_array possible_keys(thd,"possible_keys");
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 536f828ee1d..45bf4ba5fe8 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -309,6 +309,7 @@ typedef struct st_join_table {
Table_access_tracker *tracker;
Table_access_tracker *jbuf_tracker;
+ Time_and_counter_tracker *jbuf_unpack_tracker;
// READ_RECORD::Setup_func materialize_table;
READ_RECORD::Setup_func read_first_record;
@@ -341,6 +342,9 @@ typedef struct st_join_table {
*/
double read_time;
+ /* Copy of POSITION::records_init, set by get_best_combination() */
+ double records_init;
+
/* Copy of POSITION::records_read, set by get_best_combination() */
double records_read;
@@ -356,7 +360,6 @@ typedef struct st_join_table {
double partial_join_cardinality;
/* set by estimate_scan_time() */
- double cached_scan_time;
double cached_scan_and_compare_time;
double cached_forced_index_cost;
@@ -959,21 +962,44 @@ public:
/* The table that's put into join order */
JOIN_TAB *table;
+ /* number of rows that will be read from the table */
+ double records_init;
+
+ /*
+ Number of rows left after filtering, calculated in best_access_path()
+ In case of use_cond_selectivity > 1 it contains rows after the used
+ rowid filter (if such one exists).
+ If use_cond_selectivity <= 1 it contains the minimum rows of any
+ rowid filtering or records_init if no filter exists.
+ */
+ double records_after_filter;
+
/*
- The number of rows that will be read from the table
+ Number of expected rows before applying the full WHERE clause. This
+ includes rowid filter and table->cond_selectivity if
+ use_cond_selectivity > 1. See matching_candidates_in_table().
+ Should normally not be used.
*/
double records_read;
/*
- The "fanout": number of output rows that will be produced (after
+ The number of rows after applying the WHERE clause.
+
+ Same as the "fanout": number of output rows that will be produced (after
pushed down selection condition is applied) per each row combination of
previous tables.
- This takes into account table->cond_selectivity, the WHERE clause
- related to this table calculated in
- calculate_cond_selectivity_for_table(), and the used rowid filter but
- does not take into account the WHERE clause involving preceding tables
- calculated in table_after_join_selectivity().
+ In best_access_path() it is set to the minum number of accepted rows
+ for any possible access method or filter:
+
+ records_out takes into account table->cond_selectivity, the WHERE clause
+ related to this table calculated in calculate_cond_selectivity_for_table(),
+ and the used rowid filter.
+
+ After best_access_path() records_out it does not yet take into
+ account the part of the WHERE clause involving preceding tables.
+ records_out is updated in best_extension_by_limited_search() to take these
+ tables into account by calling table_after_join_selectivity().
*/
double records_out;
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 87f697c5ae9..d94bd127311 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -50,6 +50,7 @@
#include "authors.h"
#include "contributors.h"
#include "sql_partition.h"
+#include "optimizer_defaults.h"
#ifdef HAVE_EVENT_SCHEDULER
#include "events.h"
#include "event_data_objects.h"
@@ -3668,6 +3669,9 @@ const char* get_one_variable(THD *thd,
/* 6 is the default precision for '%f' in sprintf() */
end= buff + my_fcvt(*value.as_double, 6, buff, NULL);
break;
+ case SHOW_OPTIMIZER_COST: // Stored in 1ms, displayed in us
+ end= buff + my_fcvt(*value.as_double*1000, 6, buff, NULL);
+ break;
case SHOW_LONG_STATUS:
value.as_char= status_var_value.as_char + value.as_intptr;
/* fall through */
@@ -9188,6 +9192,49 @@ int fill_key_cache_tables(THD *thd, TABLE_LIST *tables, COND *cond)
}
+/* Ensure we return 'OPTIMIZER_COST_UNDEF' if cost < 0 */
+
+static double fix_cost(double cost)
+{
+ return cost < 0 ? OPTIMIZER_COST_UNDEF : cost;
+}
+
+static int run_fill_optimizer_costs_tables(const LEX_CSTRING *name,
+ const OPTIMIZER_COSTS *costs,
+ TABLE *table)
+{
+ THD *thd= table->in_use;
+ DBUG_ENTER("run_fill_optimizer_costs_tables");
+
+ restore_record(table, s->default_values);
+ table->field[0]->store(name->str, name->length, system_charset_info);
+ table->field[1]->store(fix_cost(costs->disk_read_cost*1000.0));
+ table->field[2]->store(fix_cost(costs->index_block_copy_cost*1000.0));
+ table->field[3]->store(fix_cost(costs->key_cmp_cost*1000.0));
+ table->field[4]->store(fix_cost(costs->key_copy_cost*1000.0));
+ table->field[5]->store(fix_cost(costs->key_lookup_cost*1000.0));
+ table->field[6]->store(fix_cost(costs->key_next_find_cost*1000.0));
+ table->field[7]->store(fix_cost(costs->disk_read_ratio));
+ table->field[8]->store(fix_cost(costs->row_copy_cost*1000.0));
+ table->field[9]->store(fix_cost(costs->row_lookup_cost*1000.0));
+ table->field[10]->store(fix_cost(costs->row_next_find_cost*1000.0));
+ table->field[11]->store(fix_cost(costs->rowid_cmp_cost*1000.0));
+ table->field[12]->store(fix_cost(costs->rowid_copy_cost*1000.0));
+
+ DBUG_RETURN(schema_table_store_record(thd, table));
+}
+
+
+int fill_optimizer_costs_tables(THD *thd, TABLE_LIST *tables, COND *cond)
+{
+ DBUG_ENTER("fill_optimizer_costs_tables");
+
+ int res= process_optimizer_costs(run_fill_optimizer_costs_tables,
+ tables->table);
+ DBUG_RETURN(res);
+}
+
+
namespace Show {
ST_FIELD_INFO schema_fields_info[]=
@@ -9816,6 +9863,25 @@ ST_FIELD_INFO keycache_fields_info[]=
};
+ST_FIELD_INFO optimizer_costs_fields_info[]=
+{
+ Column("ENGINE", Varchar(NAME_LEN),NOT_NULL),
+ Column("OPTIMIZER_DISK_READ_COST", Decimal(906), NOT_NULL),
+ Column("OPTIMIZER_INDEX_BLOCK_COPY_COST", Decimal(906), NOT_NULL),
+ Column("OPTIMIZER_KEY_COMPARE_COST", Decimal(906), NOT_NULL),
+ Column("OPTIMIZER_KEY_COPY_COST", Decimal(906), NOT_NULL),
+ Column("OPTIMIZER_KEY_LOOKUP_COST", Decimal(906), NOT_NULL),
+ Column("OPTIMIZER_KEY_NEXT_FIND_COST", Decimal(906), NOT_NULL),
+ Column("OPTIMIZER_DISK_READ_RATIO", Decimal(906), NOT_NULL),
+ Column("OPTIMIZER_ROW_COPY_COST", Decimal(906), NOT_NULL),
+ Column("OPTIMIZER_ROW_LOOKUP_COST", Decimal(906), NOT_NULL),
+ Column("OPTIMIZER_ROW_NEXT_FIND_COST", Decimal(906), NOT_NULL),
+ Column("OPTIMIZER_ROWID_COMPARE_COST", Decimal(906), NOT_NULL),
+ Column("OPTIMIZER_ROWID_COPY_COST", Decimal(906), NOT_NULL),
+ CEnd()
+};
+
+
ST_FIELD_INFO show_explain_tabular_fields_info[]=
{
Column("id", SLonglong(3), NULLABLE, "id"),
@@ -9954,6 +10020,8 @@ ST_SCHEMA_TABLE schema_tables[]=
OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY},
{"OPEN_TABLES", Show::open_tables_fields_info, 0,
fill_open_tables, make_old_format, 0, -1, -1, 1, 0},
+ {"OPTIMIZER_COSTS", Show::optimizer_costs_fields_info, 0,
+ fill_optimizer_costs_tables, 0, 0, -1,-1, 0, 0},
{"OPTIMIZER_TRACE", Show::optimizer_trace_info, 0,
fill_optimizer_trace_info, NULL, NULL, -1, -1, false, 0},
{"PARAMETERS", Show::parameters_fields_info, 0,
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 0b09d52e217..d4fe31b25f1 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -8271,7 +8271,7 @@ assign_to_keycache_parts:
key_cache_name:
ident { $$= $1; }
- | DEFAULT { $$ = default_key_cache_base; }
+ | DEFAULT { $$ = default_base; }
;
preload:
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 894d2bede28..d5146026692 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -53,8 +53,9 @@
#include "debug_sync.h" // DEBUG_SYNC
#include "sql_show.h"
#include "opt_trace_context.h"
-
#include "log_event.h"
+#include "optimizer_defaults.h"
+
#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE
#include "../storage/perfschema/pfs_server.h"
#endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */
@@ -6973,68 +6974,111 @@ static Sys_var_ulong Sys_optimizer_max_sel_arg_weight(
SESSION_VAR(optimizer_max_sel_arg_weight), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, ULONG_MAX), DEFAULT(SEL_ARG::MAX_WEIGHT), BLOCK_SIZE(1));
-/*
- We don't allow 100 for optimizer_cache_cost as there is always a small
- cost of finding the key, on cached pages, that we have to take into account.
-*/
-static bool update_optimizer_cache_hit_ratio(sys_var *self, THD *thd,
- enum_var_type type)
-{
- if (type == OPT_SESSION)
- thd->optimizer_cache_hit_ratio=
- cache_hit_ratio(thd->variables.optimizer_cache_hit_ratio);
- return 0;
-}
-
-static Sys_var_uint Sys_optimizer_cache_hit_ratio(
- "optimizer_cache_hit_ratio",
- "Expected hit rate of the row and index cache in storage engines. "
- "The value should be an integer between 0 and 99, where 0 means cache is "
- "empty and 99 means that value is almost always in the cache.",
- SESSION_VAR(optimizer_cache_hit_ratio), CMD_LINE(REQUIRED_ARG),
- VALID_RANGE(0, 99), DEFAULT(DEFAULT_CACHE_HIT_RATIO), 1, NO_MUTEX_GUARD,
- NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(update_optimizer_cache_hit_ratio));
-
-static Sys_var_double Sys_optimizer_key_copy_cost(
+static Sys_var_engine_optimizer_cost Sys_optimizer_disk_read_ratio(
+ "optimizer_disk_read_ratio",
+ "Chance that we have to do a disk read to find a row or index entry from "
+ "the engine cache (cache_misses/total_cache_requests). 0.0 means that "
+ "everything is cached and 1.0 means that nothing is expected to be in the "
+ "engine cache.",
+ COST_VAR(disk_read_ratio),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_DISK_READ_RATIO),
+ VALID_RANGE(0.0, 1.0), DEFAULT(DEFAULT_DISK_READ_RATIO), COST_ADJUST(1));
+
+static Sys_var_engine_optimizer_cost Sys_optimizer_key_lookup_cost(
+ "optimizer_key_lookup_cost",
+ "Cost for finding a key based on a key value",
+ COST_VAR(key_lookup_cost),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_KEY_LOOKUP_COST),
+ VALID_RANGE(0, 1000), DEFAULT(DEFAULT_KEY_LOOKUP_COST), COST_ADJUST(1000));
+
+static Sys_var_engine_optimizer_cost Sys_optimizer_row_lookup_cost(
+ "optimizer_row_lookup_cost",
+ "Cost of finding a row based on a rowid or a clustered key.",
+ COST_VAR(row_lookup_cost),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_ROW_LOOKUP_COST),
+ VALID_RANGE(0, 1000), DEFAULT(DEFAULT_ROW_LOOKUP_COST), COST_ADJUST(1000));
+
+static Sys_var_engine_optimizer_cost Sys_optimizer_disk_read_cost(
+ "optimizer_disk_read_cost",
+ "Cost of reading a block of IO_SIZE (4096) from a disk (in usec).",
+ COST_VAR(disk_read_cost),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_DISK_READ_COST),
+ VALID_RANGE(0, 10000), DEFAULT(DEFAULT_DISK_READ_COST), COST_ADJUST(1000));
+
+static Sys_var_engine_optimizer_cost Sys_optimizer_key_copy_cost(
"optimizer_key_copy_cost",
- "Cost of finding the next key in the engine and copying it to the SQL layer.",
- SESSION_VAR(optimizer_key_copy_cost), CMD_LINE(REQUIRED_ARG),
- VALID_RANGE(0, 1), DEFAULT(DEFAULT_KEY_COPY_COST), NO_MUTEX_GUARD,
- NOT_IN_BINLOG);
+ "Cost of finding the next key in the engine and copying it to the SQL "
+ "layer.",
+ COST_VAR(key_copy_cost),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_KEY_COPY_COST),
+ VALID_RANGE(0, 1000), DEFAULT(DEFAULT_KEY_COPY_COST), COST_ADJUST(1000));
-static Sys_var_double Sys_optimizer_index_block_copy_cost(
+static Sys_var_engine_optimizer_cost Sys_optimizer_index_block_copy_cost(
"optimizer_index_block_copy_cost",
- "Cost of copying a key block from the cache to intern storage as part of an "
- "index scan.",
- SESSION_VAR(optimizer_index_block_copy_cost), CMD_LINE(REQUIRED_ARG),
- VALID_RANGE(0, 1), DEFAULT(DEFAULT_INDEX_BLOCK_COPY_COST), NO_MUTEX_GUARD,
- NOT_IN_BINLOG);
-
-static Sys_var_double Sys_optimizer_key_next_find_cost(
+ "Cost of copying a key block from the cache to intern storage as part of "
+ "an index scan.",
+ COST_VAR(index_block_copy_cost),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_INDEX_BLOCK_COPY_COST),
+ VALID_RANGE(0, 1000), DEFAULT(DEFAULT_INDEX_BLOCK_COPY_COST), COST_ADJUST(1000));
+
+static Sys_var_engine_optimizer_cost Sys_optimizer_row_next_find_cost(
+ "optimizer_row_next_find_cost",
+ "Cost of finding the next row when scanning the table.",
+ COST_VAR(row_next_find_cost),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_ROW_NEXT_FIND_COST),
+ VALID_RANGE(0, 1000), DEFAULT(DEFAULT_ROW_NEXT_FIND_COST), COST_ADJUST(1000));
+
+static Sys_var_engine_optimizer_cost Sys_optimizer_key_next_find_cost(
"optimizer_key_next_find_cost",
"Cost of finding the next key and rowid when using filters.",
- SESSION_VAR(optimizer_key_next_find_cost), CMD_LINE(REQUIRED_ARG),
- VALID_RANGE(0, 1), DEFAULT(DEFAULT_KEY_NEXT_FIND_COST), NO_MUTEX_GUARD,
- NOT_IN_BINLOG);
+ COST_VAR(key_next_find_cost),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_KEY_NEXT_FIND_COST),
+ VALID_RANGE(0, 1000), DEFAULT(DEFAULT_KEY_NEXT_FIND_COST), COST_ADJUST(1000));
-static Sys_var_double Sys_optimizer_row_copy_cost(
+static Sys_var_engine_optimizer_cost Sys_optimizer_row_copy_cost(
"optimizer_row_copy_cost",
"Cost of copying a row from the engine or the join cache to the SQL layer.",
- SESSION_VAR(optimizer_row_copy_cost), CMD_LINE(REQUIRED_ARG),
- VALID_RANGE(0, 1), DEFAULT(DEFAULT_ROW_COPY_COST), NO_MUTEX_GUARD,
- NOT_IN_BINLOG);
+ COST_VAR(row_copy_cost),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_ROW_COPY_COST),
+ VALID_RANGE(0, 1000), DEFAULT(DEFAULT_ROW_COPY_COST), COST_ADJUST(1000));
-static Sys_var_double Sys_optimizer_where_cost(
- "optimizer_where_cost",
- "Cost of checking the row against the WHERE clause.",
- SESSION_VAR(optimizer_where_cost), CMD_LINE(REQUIRED_ARG),
- VALID_RANGE(0, 1), DEFAULT(DEFAULT_WHERE_COST), NO_MUTEX_GUARD,
- NOT_IN_BINLOG);
-
-static Sys_var_double Sys_optimizer_key_cmp_cost(
+static Sys_var_engine_optimizer_cost Sys_optimizer_key_cmp_cost(
"optimizer_key_compare_cost",
"Cost of checking a key against the end key condition.",
- SESSION_VAR(optimizer_key_cmp_cost), CMD_LINE(REQUIRED_ARG),
- VALID_RANGE(0, 1), DEFAULT(DEFAULT_KEY_COMPARE_COST), NO_MUTEX_GUARD,
- NOT_IN_BINLOG);
+ COST_VAR(key_cmp_cost),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_KEY_CMP_COST),
+ VALID_RANGE(0, 1000), DEFAULT(DEFAULT_KEY_COMPARE_COST), COST_ADJUST(1000));
+
+static Sys_var_engine_optimizer_cost Sys_optimizer_rowid_cmp_cost(
+ "optimizer_rowid_compare_cost",
+ "Cost of comparing two rowid's",
+ COST_VAR(rowid_cmp_cost),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_ROWID_CMP_COST),
+ VALID_RANGE(0, 1000), DEFAULT(DEFAULT_ROWID_COMPARE_COST), COST_ADJUST(1000));
+
+static Sys_var_engine_optimizer_cost Sys_optimizer_rowid_copy_cost(
+ "optimizer_rowid_copy_cost",
+ "Cost of copying a rowid",
+ COST_VAR(rowid_copy_cost),
+ CMD_LINE(REQUIRED_ARG, OPT_COSTS_ROWID_COPY_COST),
+ VALID_RANGE(0, 1000), DEFAULT(DEFAULT_ROWID_COPY_COST), COST_ADJUST(1000));
+
+/* The following costs are stored in THD and handler */
+
+static Sys_var_optimizer_cost Sys_optimizer_where_cost(
+ "optimizer_where_cost",
+ "Cost of checking the row against the WHERE clause. Increasing this will "
+ "have the optimizer to prefer plans with less row combinations.",
+ SESSION_VAR(optimizer_where_cost),
+ CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, 100000), DEFAULT(DEFAULT_WHERE_COST), COST_ADJUST(1000));
+
+static Sys_var_optimizer_cost Sys_optimizer_scan_cost(
+ "optimizer_scan_setup_cost",
+ "Extra cost added to TABLE and INDEX scans to get optimizer to prefer "
+ "index lookups.",
+ SESSION_VAR(optimizer_scan_setup_cost),
+ CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, 100000000), DEFAULT(DEFAULT_TABLE_SCAN_SETUP_COST),
+ COST_ADJUST(1000));
diff --git a/sql/sys_vars.inl b/sql/sys_vars.inl
index b1d7bc31255..5997446a61e 100644
--- a/sql/sys_vars.inl
+++ b/sql/sys_vars.inl
@@ -32,6 +32,7 @@
#include "rpl_mi.h" // For Multi-Source Replication
#include "debug_sync.h"
#include "sql_acl.h" // check_global_access()
+#include "optimizer_defaults.h" // create_optimizer_costs
/*
a set of mostly trivial (as in f(X)=X) defines below to make system variable
@@ -40,6 +41,7 @@
#define VALID_RANGE(X,Y) X,Y
#define DEFAULT(X) X
#define BLOCK_SIZE(X) X
+#define COST_ADJUST(X) X
#define GLOBAL_VAR(X) sys_var::GLOBAL, (((char*)&(X))-(char*)&global_system_variables), sizeof(X)
#define SESSION_VAR(X) sys_var::SESSION, offsetof(SV, X), sizeof(((SV *)0)->X)
#define SESSION_ONLY(X) sys_var::ONLY_SESSION, offsetof(SV, X), sizeof(((SV *)0)->X)
@@ -1048,7 +1050,7 @@ public:
/* If no basename, assume it's for the key cache named 'default' */
if (!base_name->length)
- base_name= &default_key_cache_base;
+ base_name= &default_base;
key_cache= get_key_cache(base_name);
@@ -1228,6 +1230,143 @@ public:
{ var->save_result.double_value= getopt_ulonglong2double(option.def_value); }
};
+
+/*
+ Optimizer costs
+ Stored as cost factor (1 cost = 1 ms).
+ Given and displayed as microsconds (as most values are very small)
+*/
+
+class Sys_var_optimizer_cost: public Sys_var_double
+{
+public:
+ double cost_adjust;
+ Sys_var_optimizer_cost(const char *name_arg,
+ const char *comment, int flag_args, ptrdiff_t off, size_t size,
+ CMD_LINE getopt,
+ double min_val, double max_val, double def_val,
+ ulong arg_cost_adjust, PolyLock *lock=0,
+ enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG,
+ on_check_function on_check_func=0,
+ on_update_function on_update_func=0,
+ const char *substitute=0)
+ :Sys_var_double(name_arg, comment, flag_args, off, size, getopt,
+ min_val, max_val, def_val, lock,
+ binlog_status_arg,
+ on_check_func,
+ on_update_func,
+ substitute)
+ {
+ if (arg_cost_adjust == 1000)
+ {
+ show_val_type= SHOW_OPTIMIZER_COST; // For select @@var
+ option.var_type|= GET_ADJUST_VALUE;
+ }
+ cost_adjust= (double) arg_cost_adjust;
+ global_var(double)= (double)option.def_value/cost_adjust; // To usec
+ }
+ bool session_update(THD *thd, set_var *var)
+ {
+ session_var(thd, double)= var->save_result.double_value/cost_adjust;
+ return false;
+ }
+ bool global_update(THD *thd, set_var *var)
+ {
+ global_var(double)= var->save_result.double_value/cost_adjust;
+ return false;
+ }
+ void session_save_default(THD *thd, set_var *var)
+ { var->save_result.double_value= global_var(double) * cost_adjust; }
+
+ void global_save_default(THD *thd, set_var *var)
+ {
+ var->save_result.double_value= getopt_ulonglong2double(option.def_value)*
+ cost_adjust;
+ }
+};
+
+
+/*
+ The class for optimzer costs with structured names, unique for each engine.
+ Used as 'engine.variable_name'
+
+ Class specific constructor arguments:
+ everything derived from Sys_var_optimizer_cost
+
+ Backing store: double
+
+ @note these variables can be only GLOBAL
+*/
+
+#define COST_VAR(X) GLOBAL_VAR(default_optimizer_costs.X)
+#define cost_var_ptr(KC, OFF) (((uchar*)(KC))+(OFF))
+#define cost_var(KC, OFF) (*(double*)cost_var_ptr(KC, OFF))
+typedef bool (*cost_update_function)(THD *, OPTIMIZER_COSTS *, ptrdiff_t,
+ double, double);
+
+static bool update_cost(THD *thd, OPTIMIZER_COSTS *key_cache,
+ ptrdiff_t offset, double new_value, double cost_adjust)
+{
+ cost_var(key_cache, offset)= new_value / cost_adjust;
+ return 0;
+}
+
+
+class Sys_var_engine_optimizer_cost: public Sys_var_optimizer_cost
+{
+ cost_update_function cost_update;
+ public:
+ Sys_var_engine_optimizer_cost(const char *name_arg,
+ const char *comment, int flag_args, ptrdiff_t off, size_t size,
+ CMD_LINE getopt,
+ double min_val, double max_val, double def_val,
+ long cost_adjust, PolyLock *lock= 0,
+ cost_update_function on_update_func= update_cost,
+ const char *substitute=0)
+ : Sys_var_optimizer_cost(name_arg, comment, flag_args, off, size,
+ getopt, min_val, max_val, def_val, cost_adjust,
+ lock, VARIABLE_NOT_IN_BINLOG, 0,
+ 0, substitute),
+ cost_update(on_update_func)
+ {
+ option.var_type|= GET_ASK_ADDR;
+ option.value= (uchar**)1; // crash me, please
+ // fix an offset from global_system_variables to be an offset in KEY_CACHE
+ offset= global_var_ptr() - (uchar*) &default_optimizer_costs;
+ SYSVAR_ASSERT(scope() == GLOBAL);
+ }
+ bool global_update(THD *thd, set_var *var)
+ {
+ double new_value= var->save_result.double_value;
+ LEX_CSTRING *base_name= &var->base;
+ OPTIMIZER_COSTS *optimizer_costs;
+ bool res;
+
+ /* If no basename, assume it's for the default costs */
+ if (!base_name->length)
+ base_name= &default_base;
+
+ mysql_mutex_lock(&LOCK_optimizer_costs);
+ if (!(optimizer_costs= get_or_create_optimizer_costs(base_name->str,
+ base_name->length)))
+ {
+ mysql_mutex_unlock(&LOCK_optimizer_costs);
+ return true;
+ }
+ res= cost_update(thd, optimizer_costs, offset, new_value, cost_adjust);
+ mysql_mutex_unlock(&LOCK_optimizer_costs);
+ return res;
+ }
+ const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const
+ {
+ OPTIMIZER_COSTS *optimizer_costs= get_optimizer_costs(base);
+ if (!optimizer_costs)
+ optimizer_costs= &default_optimizer_costs;
+ return cost_var_ptr(optimizer_costs, offset);
+ }
+};
+
+
/**
The class for the @max_user_connections.
It's derived from Sys_var_uint, but non-standard session value
diff --git a/sql/table.cc b/sql/table.cc
index 5b84cd46152..19a37e0d4d2 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -2290,7 +2290,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
share->keynames.count != keys))
goto err;
- /* Allocate handler */
+ /* Allocate handler */
if (!(handler_file= get_new_handler(share, thd->mem_root,
plugin_hton(se_plugin))))
goto err;
@@ -2788,6 +2788,8 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
DBUG_ASSERT((null_pos + (null_bit_pos + 7) / 8) <= share->field[0]->ptr);
}
+ share->primary_key= MAX_KEY;
+
/* Fix key->name and key_part->field */
if (key_parts)
{
@@ -2919,6 +2921,11 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
}
}
+ /* Primary key must be set early as engine may use it in index_flag() */
+ share->primary_key= (primary_key < MAX_KEY &&
+ share->keys_in_use.is_set(primary_key) ?
+ primary_key : MAX_KEY);
+
key_first_info= keyinfo;
for (uint key=0 ; key < keys ; key++,keyinfo++)
{
@@ -3161,7 +3168,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
if (primary_key < MAX_KEY &&
(share->keys_in_use.is_set(primary_key)))
{
- share->primary_key= primary_key;
+ DBUG_ASSERT(share->primary_key == primary_key);
/*
If we are using an integer as the primary key then allow the user to
refer to it as '_rowid'
@@ -3178,10 +3185,10 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
}
}
else
- share->primary_key = MAX_KEY; // we do not have a primary key
+ {
+ DBUG_ASSERT(share->primary_key == MAX_KEY);
+ }
}
- else
- share->primary_key= MAX_KEY;
if (new_field_pack_flag <= 1)
{
/* Old file format with default as not null */
@@ -3407,6 +3414,27 @@ err:
}
+/*
+ Make a copy of optimizer costs to be able to access these without any locks
+ and to allow the engine to update costs.
+*/
+
+void TABLE_SHARE::update_optimizer_costs(handlerton *hton)
+{
+ if (hton != view_pseudo_hton && !(hton->flags & HTON_HIDDEN))
+ {
+ mysql_mutex_lock(&LOCK_optimizer_costs);
+ memcpy(&optimizer_costs, hton->optimizer_costs, sizeof(optimizer_costs));
+ mysql_mutex_unlock(&LOCK_optimizer_costs);
+ }
+ else
+ {
+ bzero(&optimizer_costs, sizeof(optimizer_costs));
+ MEM_UNDEFINED(&optimizer_costs, sizeof(optimizer_costs));
+ }
+}
+
+
static bool sql_unusable_for_discovery(THD *thd, handlerton *engine,
const char *sql)
{
@@ -5657,7 +5685,6 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
no_cache= false;
initialize_opt_range_structures();
-
/*
Update optimizer_costs to ensure that a SET STATEMENT of the
variables it will work.
@@ -10418,10 +10445,10 @@ inline void TABLE::initialize_opt_range_structures()
}
-double TABLE::OPT_RANGE::index_only_fetch_cost(THD *thd)
+double TABLE::OPT_RANGE::index_only_fetch_cost(TABLE *table)
{
- return (index_only_cost + (double) rows *
- thd->variables.optimizer_key_copy_cost);
+ return (index_only_cost +
+ (double) rows * table->s->optimizer_costs.key_copy_cost);
}
diff --git a/sql/table.h b/sql/table.h
index aa4b5c9a8fd..34514186b5a 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -813,6 +813,7 @@ struct TABLE_SHARE
return is_view ? view_pseudo_hton :
db_plugin ? plugin_hton(db_plugin) : NULL;
}
+ OPTIMIZER_COSTS optimizer_costs; /* Copy of get_optimizer_costs() */
enum row_type row_type; /* How rows are stored */
enum Table_type table_type;
enum tmp_table_type tmp_table;
@@ -888,6 +889,7 @@ struct TABLE_SHARE
bool has_update_default_function;
bool can_do_row_logging; /* 1 if table supports RBR */
bool long_unique_table;
+ bool optimizer_costs_inited;
ulong table_map_id; /* for row-based replication */
@@ -1194,6 +1196,7 @@ struct TABLE_SHARE
void set_overlapped_keys();
void set_ignored_indexes();
key_map usable_indexes(THD *thd);
+ void update_optimizer_costs(handlerton *hton);
};
/* not NULL, but cannot be dereferenced */
@@ -1420,7 +1423,7 @@ public:
Cost of fetching keys with index only read and returning them to the
sql level.
*/
- double index_only_fetch_cost(THD *thd);
+ double index_only_fetch_cost(TABLE *table);
} *opt_range;
/*
Bitmaps of key parts that =const for the duration of join execution. If
@@ -1736,6 +1739,12 @@ public:
uint actual_n_key_parts(KEY *keyinfo);
ulong actual_key_flags(KEY *keyinfo);
int update_virtual_field(Field *vf, bool ignore_warnings);
+ inline size_t key_storage_length(uint index)
+ {
+ if (file->is_clustering_key(index))
+ return s->stored_rec_length;
+ return key_info[index].key_length + file->ref_length;
+ }
int update_virtual_fields(handler *h, enum_vcol_update_mode update_mode);
int update_default_fields(bool ignore_errors);
void evaluate_update_default_function();
diff --git a/sql/uniques.cc b/sql/uniques.cc
index a09655bcaca..8555fc21624 100644
--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -159,7 +159,7 @@ inline double log2_n_fact(double x)
total_buf_elems* log2(n_buffers) * ROWID_COMPARE_COST;
*/
-static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
+static double get_merge_buffers_cost(THD *thd, uint *buff_elems, uint elem_size,
uint *first, uint *last,
double compare_factor)
{
@@ -171,7 +171,8 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
size_t n_buffers= last - first + 1;
/* Using log2(n)=log(n)/log(2) formula */
- return (2*((double)total_buf_elems*elem_size) / IO_SIZE +
+ return (2*((double)total_buf_elems*elem_size) / IO_SIZE *
+ default_optimizer_costs.disk_read_cost +
total_buf_elems*log((double) n_buffers) * compare_factor / M_LN2);
}
@@ -185,6 +186,7 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
SYNOPSIS
get_merge_many_buffs_cost()
+ thd THD, used to get disk_read_cost
buffer buffer space for temporary data, at least
Unique::get_cost_calc_buff_size bytes
maxbuffer # of full buffers
@@ -203,7 +205,8 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
Cost of merge in disk seeks.
*/
-static double get_merge_many_buffs_cost(uint *buffer,
+static double get_merge_many_buffs_cost(THD *thd,
+ uint *buffer,
uint maxbuffer, uint max_n_elems,
uint last_n_elems, int elem_size,
double compare_factor)
@@ -231,13 +234,13 @@ static double get_merge_many_buffs_cost(uint *buffer,
uint lastbuff= 0;
for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF)
{
- total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
+ total_cost+=get_merge_buffers_cost(thd, buff_elems, elem_size,
buff_elems + i,
buff_elems + i + MERGEBUFF-1,
compare_factor);
lastbuff++;
}
- total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
+ total_cost+=get_merge_buffers_cost(thd, buff_elems, elem_size,
buff_elems + i,
buff_elems + maxbuffer,
compare_factor);
@@ -246,7 +249,7 @@ static double get_merge_many_buffs_cost(uint *buffer,
}
/* Simulate final merge_buff call. */
- total_cost += get_merge_buffers_cost(buff_elems, elem_size,
+ total_cost += get_merge_buffers_cost(thd, buff_elems, elem_size,
buff_elems, buff_elems + maxbuffer,
compare_factor);
return total_cost;
@@ -304,7 +307,7 @@ static double get_merge_many_buffs_cost(uint *buffer,
these will be random seeks.
*/
-double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size,
+double Unique::get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size,
size_t max_in_memory_size,
double compare_factor,
bool intersect_fl, bool *in_memory)
@@ -312,7 +315,7 @@ double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size,
size_t max_elements_in_tree;
size_t last_tree_elems;
size_t n_full_trees; /* number of trees in unique - 1 */
- double result;
+ double result, disk_read_cost;
max_elements_in_tree= ((size_t) max_in_memory_size /
ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
@@ -345,14 +348,15 @@ double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size,
First, add cost of writing all trees to disk, assuming that all disk
writes are sequential.
*/
- result += DISK_SEEK_BASE_COST * n_full_trees *
- ceil(((double) key_size)*max_elements_in_tree / IO_SIZE);
- result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE);
+ disk_read_cost= DISK_READ_COST_THD(thd);
+ result += disk_read_cost * n_full_trees *
+ ceil(((double) key_size)*max_elements_in_tree / DISK_CHUNK_SIZE);
+ result += disk_read_cost * ceil(((double) key_size)*last_tree_elems / DISK_CHUNK_SIZE);
/* Cost of merge */
if (intersect_fl)
key_size+= sizeof(element_count);
- double merge_cost= get_merge_many_buffs_cost(buffer, (uint)n_full_trees,
+ double merge_cost= get_merge_many_buffs_cost(thd, buffer, (uint)n_full_trees,
(uint)max_elements_in_tree,
(uint)last_tree_elems, key_size,
compare_factor);
@@ -361,7 +365,8 @@ double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size,
Add cost of reading the resulting sequence, assuming there were no
duplicate elements.
*/
- result += ceil((double)key_size*nkeys/IO_SIZE);
+ result+= (ceil((double)key_size*nkeys/IO_SIZE) *
+ default_optimizer_costs.disk_read_cost);
return result;
}
diff --git a/sql/uniques.h b/sql/uniques.h
index f4c45cde095..ecc49794efe 100644
--- a/sql/uniques.h
+++ b/sql/uniques.h
@@ -78,7 +78,7 @@ public:
return log((double) tree_elems) * compare_factor / M_LN2;
}
- static double get_use_cost(uint *buffer, size_t nkeys, uint key_size,
+ static double get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size,
size_t max_in_memory_size, double compare_factor,
bool intersect_fl, bool *in_memory);
inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size,
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc
index 19a0ffe028a..2a8deb431b1 100644
--- a/storage/archive/ha_archive.cc
+++ b/storage/archive/ha_archive.cc
@@ -132,7 +132,8 @@ extern "C" PSI_file_key arch_key_file_data;
static handler *archive_create_handler(handlerton *hton,
TABLE_SHARE *table,
MEM_ROOT *mem_root);
-int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share);
+static int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share);
+static void archive_update_optimizer_costs(OPTIMIZER_COSTS *costs);
/*
Number of rows that will force a bulk insert.
@@ -205,6 +206,7 @@ static const char *ha_archive_exts[] = {
NullS
};
+
int archive_db_init(void *p)
{
DBUG_ENTER("archive_db_init");
@@ -217,10 +219,10 @@ int archive_db_init(void *p)
archive_hton= (handlerton *)p;
archive_hton->db_type= DB_TYPE_ARCHIVE_DB;
archive_hton->create= archive_create_handler;
- archive_hton->flags= HTON_NO_FLAGS;
archive_hton->discover_table= archive_discover;
archive_hton->tablefile_extensions= ha_archive_exts;
-
+ archive_hton->update_optimizer_costs= archive_update_optimizer_costs;
+ archive_hton->flags= HTON_NO_FLAGS;
DBUG_RETURN(0);
}
@@ -267,7 +269,7 @@ ha_archive::ha_archive(handlerton *hton, TABLE_SHARE *table_arg)
archive_reader_open= FALSE;
}
-int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share)
+static int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share)
{
DBUG_ENTER("archive_discover");
DBUG_PRINT("archive_discover", ("db: '%s' name: '%s'", share->db.str,
@@ -1092,6 +1094,54 @@ int ha_archive::index_init(uint keynr, bool sorted)
DBUG_RETURN(0);
}
+#define ARCHIVE_DECOMPRESS_TIME 0.081034543792841 // See optimizer_costs.txt
+
+static void archive_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ costs->disk_read_ratio= 0.20; // Assume 80 % of data is cached by system
+ costs->row_lookup_cost= 0; // See rnd_pos_time
+ costs->key_lookup_cost= 0; // See key_read_time
+ costs->key_next_find_cost= 0; // Only unique indexes
+ costs->index_block_copy_cost= 0;
+}
+
+
+IO_AND_CPU_COST ha_archive::scan_time()
+{
+ IO_AND_CPU_COST cost;
+ ulonglong blocks;
+ DBUG_ENTER("ha_archive::scan_time");
+
+ blocks= stats.data_file_length / IO_SIZE;
+ cost.io= 0; // No cache
+ cost.cpu= (blocks * DISK_READ_COST * DISK_READ_RATIO +
+ blocks* ARCHIVE_DECOMPRESS_TIME);
+ DBUG_RETURN(cost);
+}
+
+
+IO_AND_CPU_COST ha_archive::keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+{
+ IO_AND_CPU_COST cost= scan_time();
+ /*
+ As these is an unique indexe, assume that we have to scan half the file for
+ each range to find the row.
+ */
+ cost.cpu= cost.cpu * ranges / 2;
+ return cost;
+}
+
+
+IO_AND_CPU_COST ha_archive::rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST cost;
+ /* We have to do one azseek() for each row */
+ cost.io= rows2double(rows);
+ cost.cpu= rows * (DISK_READ_COST * DISK_READ_RATIO + ARCHIVE_DECOMPRESS_TIME);
+ return cost;
+}
+
/*
No indexes, so if we get a request for an index search since we tell
@@ -1116,8 +1166,6 @@ int ha_archive::index_read_idx(uchar *buf, uint index, const uchar *key,
current_k_offset= mkey->key_part->offset;
current_key= key;
current_key_len= key_len;
-
-
DBUG_ENTER("ha_archive::index_read_idx");
rc= rnd_init(TRUE);
diff --git a/storage/archive/ha_archive.h b/storage/archive/ha_archive.h
index 2bb5079868b..c96f5d8d122 100644
--- a/storage/archive/ha_archive.h
+++ b/storage/archive/ha_archive.h
@@ -111,6 +111,10 @@ public:
uint max_supported_key_length() const { return sizeof(ulonglong); }
uint max_supported_key_part_length() const { return sizeof(ulonglong); }
ha_rows records() { return share->rows_recorded; }
+ IO_AND_CPU_COST scan_time() override;
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks) override;
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override;
int index_init(uint keynr, bool sorted);
virtual int index_read(uchar * buf, const uchar * key,
uint key_len, enum ha_rkey_function find_flag);
diff --git a/storage/connect/ha_connect.h b/storage/connect/ha_connect.h
index 71ceb7974ba..ca3b69bb552 100644
--- a/storage/connect/ha_connect.h
+++ b/storage/connect/ha_connect.h
@@ -308,13 +308,18 @@ public:
/** @brief
Called in test_quick_select to determine if indexes should be used.
*/
- virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; }
+ virtual IO_AND_CPU_COST scan_time()
+ { return { 0, (double) (stats.records+stats.deleted) * avg_io_cost() }; };
/** @brief
This method will never be called if you do not implement indexes.
*/
- virtual double read_time(uint, uint, ha_rows rows)
- { return (double) rows / 20.0+1; }
+ virtual IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+ {
+ return { 0, (double) rows * 0.001 };
+ }
+
/*
Everything below are methods that we implement in ha_connect.cc.
diff --git a/storage/csv/ha_tina.h b/storage/csv/ha_tina.h
index 043183444da..5a56dc6c4dd 100644
--- a/storage/csv/ha_tina.h
+++ b/storage/csv/ha_tina.h
@@ -124,7 +124,12 @@ public:
/*
Called in test_quick_select to determine if indexes should be used.
*/
- virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; }
+ virtual IO_AND_CPU_COST scan_time()
+ {
+ return { (double) ((share->saved_data_file_length + IO_SIZE-1))/ IO_SIZE *
+ avg_io_cost(),
+ (stats.records+stats.deleted) * ROW_NEXT_FIND_COST };
+ }
/* The next method will never be called */
virtual bool fast_key_read() { return 1;}
/*
diff --git a/storage/example/ha_example.h b/storage/example/ha_example.h
index 2d3fa6d4216..3b11945b182 100644
--- a/storage/example/ha_example.h
+++ b/storage/example/ha_example.h
@@ -150,15 +150,40 @@ public:
uint max_supported_key_length() const { return 0; }
/** @brief
- Called in test_quick_select to determine if indexes should be used.
+ Called in test_quick_select to determine cost of table scan
*/
- virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; }
+ virtual IO_AND_CPU_COST scan_time()
+ {
+ IO_AND_CPU_COST cost;
+ /* 0 blocks, 0.001 ms / row */
+ cost.io= (double) (stats.records+stats.deleted) * avg_io_cost();
+ cost.cpu= 0;
+ return cost;
+ }
/** @brief
This method will never be called if you do not implement indexes.
*/
- virtual double read_time(uint, uint, ha_rows rows)
- { return (double) rows / 20.0+1; }
+ virtual IO_AND_CPU_COST keyread_time(uint, ulong, ha_rows rows,
+ ulonglong blocks)
+ {
+ IO_AND_CPU_COST cost;
+ cost.io= blocks * avg_io_cost();
+ cost.cpu= (double) rows * 0.001;
+ return cost;
+ }
+
+ /** @brief
+ Cost of fetching 'rows' records through rnd_pos()
+ */
+ virtual IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
+ {
+ IO_AND_CPU_COST cost;
+ /* 0 blocks, 0.001 ms / row */
+ cost.io= 0;
+ cost.cpu= (double) rows * avg_io_cost();
+ return cost;
+ }
/*
Everything below are methods that we implement in ha_example.cc.
diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc
index 76708e80105..2a375a41200 100644
--- a/storage/federated/ha_federated.cc
+++ b/storage/federated/ha_federated.cc
@@ -460,6 +460,20 @@ static void init_federated_psi_keys(void)
#endif /* HAVE_PSI_INTERFACE */
/*
+ Federated doesn't need costs.disk_read_ratio as everything is one a
+ remote server and nothing is cached locally
+*/
+
+static void federated_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /*
+ Setting disk_read_ratios to 1.0, ensures we are using the costs
+ from rnd_pos_time() and scan_time()
+ */
+ costs->disk_read_ratio= 1.0;
+}
+
+/*
Initialize the federated handler.
SYNOPSIS
@@ -485,6 +499,7 @@ int federated_db_init(void *p)
federated_hton->rollback= federated_rollback;
federated_hton->create= federated_create_handler;
federated_hton->drop_table= [](handlerton *, const char*) { return -1; };
+ federated_hton->update_optimizer_costs= federated_update_optimizer_costs;
federated_hton->flags= HTON_ALTER_NOT_SUPPORTED | HTON_NO_PARTITION;
/*
@@ -905,20 +920,11 @@ ha_federated::ha_federated(handlerton *hton,
:handler(hton, table_arg),
mysql(0), stored_result(0)
{
- optimizer_cache_cost= 1;
trx_next= 0;
bzero(&bulk_insert, sizeof(bulk_insert));
}
/*
- Federated doesn't need optimizer_cache_cost as everything is one a
- remote server and nothing is cached locally
-*/
-
-void ha_federated::set_optimizer_cache_cost(double cost)
-{}
-
-/*
Convert MySQL result set row to handler internal format
SYNOPSIS
diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h
index 3f25c2d7cb9..35e5f5c8215 100644
--- a/storage/federated/ha_federated.h
+++ b/storage/federated/ha_federated.h
@@ -180,20 +180,25 @@ public:
The reason for "records * 1000" is that such a large number forces
this to use indexes "
*/
- virtual double scan_time()
+
+ IO_AND_CPU_COST scan_time()
{
DBUG_PRINT("info", ("records %lu", (ulong) stats.records));
- return (double)(stats.records*1000);
+ return
+ {
+ (double) (stats.mean_rec_length * stats.records)/IO_SIZE * avg_io_cost(),
+ 0
+ };
}
- virtual double read_time(uint index, uint ranges, ha_rows rows)
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
{
- return rows2double(rows) + rows2double(ranges);
+ return { (double) stats.records * avg_io_cost(), 0 };
}
- virtual double rnd_pos_time(ha_rows rows)
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
{
- return rows2double(rows);
+ return { (double) (ranges + rows) * avg_io_cost(), 0 };
}
- virtual void set_optimizer_cache_cost(double cost);
const key_map *keys_to_use_for_scanning() { return &key_map_full; }
/*
diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc
index 22e22958a64..b93b7a94016 100644
--- a/storage/federatedx/ha_federatedx.cc
+++ b/storage/federatedx/ha_federatedx.cc
@@ -411,6 +411,20 @@ static select_handler*
create_federatedx_select_handler(THD* thd, SELECT_LEX *sel);
/*
+ Federated doesn't need costs.disk_read_ratio as everything is one a remote
+ server and nothing is cached locally
+*/
+
+static void federatedx_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /*
+ Setting disk_read_ratios to 1.0, ensures we are using the costs
+ from rnd_pos_time() and scan_time()
+ */
+ costs->disk_read_ratio= 0.0;
+}
+
+/*
Initialize the federatedx handler.
SYNOPSIS
@@ -442,6 +456,7 @@ int federatedx_db_init(void *p)
federatedx_hton->flags= HTON_ALTER_NOT_SUPPORTED;
federatedx_hton->create_derived= create_federatedx_derived_handler;
federatedx_hton->create_select= create_federatedx_select_handler;
+ federatedx_hton->update_optimizer_costs= federatedx_update_optimizer_costs;
if (mysql_mutex_init(fe_key_mutex_federatedx,
&federatedx_mutex, MY_MUTEX_INIT_FAST))
@@ -841,17 +856,9 @@ ha_federatedx::ha_federatedx(handlerton *hton,
:handler(hton, table_arg),
txn(0), io(0), stored_result(0)
{
- optimizer_cache_cost= 1;
bzero(&bulk_insert, sizeof(bulk_insert));
}
-/*
- Federated doesn't need optimizer_cache_cost as everything is one a remote server and
- nothing is cached locally
-*/
-
-void ha_federatedx::set_optimizer_cache_cost(double cost)
-{}
/*
Convert MySQL result set row to handler internal format
diff --git a/storage/federatedx/ha_federatedx.h b/storage/federatedx/ha_federatedx.h
index a7b7833eae7..9d9267e3abf 100644
--- a/storage/federatedx/ha_federatedx.h
+++ b/storage/federatedx/ha_federatedx.h
@@ -367,20 +367,24 @@ public:
The reason for "records * 1000" is that such a large number forces
this to use indexes "
*/
- double scan_time()
+ IO_AND_CPU_COST scan_time()
{
DBUG_PRINT("info", ("records %lu", (ulong) stats.records));
- return (double)(stats.records*1000);
+ return
+ {
+ (double) (stats.mean_rec_length * stats.records)/8192 * avg_io_cost(),
+ 0
+ };
}
- double read_time(uint index, uint ranges, ha_rows rows)
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
{
- return rows2double(rows) + rows2double(ranges);
+ return { (double) (ranges + rows) * avg_io_cost(), 0 };
}
- virtual double rnd_pos_time(ha_rows rows)
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
{
- return rows2double(rows);
+ return { (double) rows * avg_io_cost(), 0 };
}
- virtual void set_optimizer_cache_cost(double cost);
const key_map *keys_to_use_for_scanning() { return &key_map_full; }
/*
diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc
index f9b365cf91e..cc7dc79e508 100644
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@@ -42,6 +42,28 @@ static int heap_drop_table(handlerton *hton, const char *path)
return error == ENOENT ? -1 : error;
}
+/* See optimizer_costs.txt for how the following values where calculated */
+#define HEAP_ROW_NEXT_FIND_COST 8.0166e-06 // For table scan
+#define BTREE_KEY_NEXT_FIND_COST 0.00007739 // For binary tree scan
+#define HEAP_LOOKUP_COST 0.00016097 // Heap lookup cost
+
+static void heap_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /*
+ A lot of values are 0 as heap supports all needed xxx_time() functions
+ */
+ costs->disk_read_cost=0; // All data in memory
+ costs->disk_read_ratio= 0.0; // All data in memory
+ costs->key_next_find_cost= 0;
+ costs->key_copy_cost= 0; // Set in keyread_time()
+ costs->row_copy_cost= 2.334e-06; // This is small as its just a memcpy
+ costs->row_lookup_cost= 0; // Direct pointer
+ costs->row_next_find_cost= 0;
+ costs->key_lookup_cost= 0;
+ costs->key_next_find_cost= 0;
+ costs->index_block_copy_cost= 0;
+}
+
int heap_init(void *p)
{
handlerton *heap_hton;
@@ -53,6 +75,7 @@ int heap_init(void *p)
heap_hton->create= heap_create_handler;
heap_hton->panic= heap_panic;
heap_hton->drop_table= heap_drop_table;
+ heap_hton->update_optimizer_costs= heap_update_optimizer_costs;
heap_hton->flags= HTON_CAN_RECREATE;
return 0;
@@ -74,7 +97,6 @@ ha_heap::ha_heap(handlerton *hton, TABLE_SHARE *table_arg)
:handler(hton, table_arg), file(0), records_changed(0), key_stat_version(0),
internal_table(0)
{
- optimizer_cache_cost= 1.0;
}
/*
@@ -230,6 +252,41 @@ void ha_heap::update_key_stats()
}
+IO_AND_CPU_COST ha_heap::keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+{
+ KEY *key=table->key_info+index;
+ if (key->algorithm == HA_KEY_ALG_BTREE)
+ {
+ double lookup_cost;
+ lookup_cost= ranges * costs->key_cmp_cost * log2(stats.records+1);
+ return {0, ranges * lookup_cost + (rows-ranges) * BTREE_KEY_NEXT_FIND_COST };
+ }
+ else
+ {
+ return {0, (ranges * HEAP_LOOKUP_COST +
+ (rows-ranges) * BTREE_KEY_NEXT_FIND_COST) };
+ }
+}
+
+
+IO_AND_CPU_COST ha_heap::scan_time()
+{
+ return {0, (double) (stats.records+stats.deleted) * HEAP_ROW_NEXT_FIND_COST };
+}
+
+
+IO_AND_CPU_COST ha_heap::rnd_pos_time(ha_rows rows)
+{
+ /*
+ The row pointer is a direct pointer to the block. Thus almost instant
+ in practice.
+ Note that ha_rnd_pos_time() will add ROW_COPY_COST to this result
+ */
+ return { 0, 0 };
+}
+
+
int ha_heap::write_row(const uchar * buf)
{
int res;
diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h
index 81978daa5d7..74a0a00a04c 100644
--- a/storage/heap/ha_heap.h
+++ b/storage/heap/ha_heap.h
@@ -62,22 +62,13 @@ public:
const key_map *keys_to_use_for_scanning() { return &btree_keys; }
uint max_supported_keys() const { return MAX_KEY; }
uint max_supported_key_part_length() const { return MAX_KEY_LENGTH; }
- double scan_time() override
- { return (double) (stats.records+stats.deleted) / 20.0+10; }
- double read_time(uint index, uint ranges, ha_rows rows) override
- { return (double) (rows +1)/ 20.0; }
- double keyread_time(uint index, uint ranges, ha_rows rows) override
- { return (double) (rows + ranges) / 20.0 ; }
- double avg_io_cost()
- { return 0.05; } /* 1/20 */
- double rnd_pos_time(ha_rows rows) override
- {
- return (double) rows/ 20.0;
- }
- /*
- Heap doesn't need optimizer_cache_cost as everything is in memory and
- it supports all needed _time() functions
- */
+ IO_AND_CPU_COST scan_time() override;
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks) override;
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override;
+ /* 0 for avg_io_cost ensures that there are no read-block calculations */
+ double avg_io_cost() override { return 0.0; }
+
int open(const char *name, int mode, uint test_if_locked);
int close(void);
void set_keys_for_scanning(void);
@@ -88,10 +79,6 @@ public:
ulonglong nb_desired_values,
ulonglong *first_value,
ulonglong *nb_reserved_values);
- void set_optimizer_cache_cost(double cost) override
- {
- optimizer_cache_cost= 1.0;
- }
int index_read_map(uchar * buf, const uchar * key, key_part_map keypart_map,
enum ha_rkey_function find_flag);
int index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map);
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 5d796cb9348..875253f931a 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -6501,6 +6501,7 @@ search_loop:
DBUG_EXECUTE_IF("bug14007649", DBUG_RETURN(n_rows););
+#ifdef NOT_USED
/* Do not estimate the number of rows in the range to over 1 / 2 of the
estimated rows in the whole table */
@@ -6515,6 +6516,10 @@ search_loop:
if (n_rows == 0)
n_rows= table_n_rows;
}
+#else
+ if (n_rows > table_n_rows)
+ n_rows= table_n_rows;
+#endif
DBUG_RETURN(n_rows);
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 7f88b42e08f..5fa31017d24 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -4079,6 +4079,26 @@ static int innodb_init_params()
DBUG_RETURN(0);
}
+
+/*********************************************************************//**
+Setup costs factors for InnoDB to be able to approximate how many
+ms different opperations takes. See cost functions in handler.h how
+the different variables are used */
+
+static void innobase_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /*
+ The following number was found by check_costs.pl when using 1M rows
+ and all rows are cached. See optimizer_costs.txt for details
+ */
+ costs->row_next_find_cost= 0.00007013;
+ costs->row_lookup_cost= 0.00076597;
+ costs->key_next_find_cost= 0.00009900;
+ costs->key_lookup_cost= 0.00079112;
+ costs->row_copy_cost= 0.00006087;
+}
+
+
/** Initialize the InnoDB storage engine plugin.
@param[in,out] p InnoDB handlerton
@return error code
@@ -4146,6 +4166,8 @@ static int innodb_init(void* p)
innobase_hton->prepare_commit_versioned
= innodb_prepare_commit_versioned;
+ innobase_hton->update_optimizer_costs= innobase_update_optimizer_costs;
+
innodb_remember_check_sysvar_funcs();
compile_time_assert(DATA_MYSQL_TRUE_VARCHAR == MYSQL_TYPE_VARCHAR);
@@ -5079,10 +5101,10 @@ ha_innobase::index_flags(
}
ulong flags= key == table_share->primary_key
- ? HA_CLUSTERED_INDEX : 0;
+ ? HA_CLUSTERED_INDEX : HA_KEYREAD_ONLY;
flags |= HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
- | HA_READ_RANGE | HA_KEYREAD_ONLY
+ | HA_READ_RANGE
| HA_DO_INDEX_COND_PUSHDOWN
| HA_DO_RANGE_FILTER_PUSHDOWN;
@@ -14336,13 +14358,15 @@ ha_innobase::estimate_rows_upper_bound()
DBUG_RETURN((ha_rows) estimate);
}
+
/*********************************************************************//**
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys.
@return estimated time measured in disk seeks */
-double
+#ifdef NOT_USED
+IO_AND_CPU_COST
ha_innobase::scan_time()
/*====================*/
{
@@ -14362,24 +14386,28 @@ ha_innobase::scan_time()
TODO: This will be further improved to return some approximate
estimate but that would also needs pre-population of stats
structure. As of now approach is in sync with MyISAM. */
- return(ulonglong2double(stats.data_file_length) / IO_SIZE + 2);
+ return { (ulonglong2double(stats.data_file_length) / IO_SIZE * avg_io_cost()), 0.0 };
}
ulint stat_clustered_index_size;
-
+ IO_AND_CPU_COST cost;
ut_a(m_prebuilt->table->stat_initialized);
stat_clustered_index_size =
m_prebuilt->table->stat_clustered_index_size;
- return((double) stat_clustered_index_size);
+ cost.io= (double) stat_clustered_index_size * avg_io_cost();
+ cost.cpu= 0;
+ return(cost);
}
+#endif
/******************************************************************//**
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes.
@return estimated time measured in disk seeks */
+#ifdef NOT_USED
double
ha_innobase::read_time(
/*===================*/
@@ -14404,14 +14432,14 @@ ha_innobase::read_time(
return(time_for_scan);
}
- return(ranges + (double) rows / (double) total_rows * time_for_scan);
+ return(ranges * KEY_LOOKUP_COST + (double) rows / (double) total_rows * time_for_scan);
}
/******************************************************************//**
Calculate the time it takes to read a set of rows with primary key.
*/
-double
+IO_AND_CPU_COST
ha_innobase::rnd_pos_time(ha_rows rows)
{
ha_rows total_rows;
@@ -14419,15 +14447,18 @@ ha_innobase::rnd_pos_time(ha_rows rows)
/* Assume that the read time is proportional to the scan time for all
rows + at most one seek per range. */
- double time_for_scan = scan_time();
+ IO_AND_CPU_COST time_for_scan = scan_time();
if ((total_rows = estimate_rows_upper_bound()) < rows) {
return(time_for_scan);
}
-
- return((double) rows + (double) rows / (double) total_rows * time_for_scan);
+ double frac= (double) rows + (double) rows / (double) total_rows;
+ time_for_scan.io*= frac;
+ time_for_scan.cpu*= frac;
+ return(time_for_scan);
}
+#endif
/*********************************************************************//**
Calculates the key number used inside MySQL for an Innobase index.
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index ec466bbc30a..87e730dc137 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -105,12 +105,10 @@ public:
int close(void) override;
- double scan_time() override;
-
- double read_time(uint index, uint ranges, ha_rows rows) override;
-
+#ifdef NOT_USED
+ IO_AND_CPU_COST scan_time() override;
double rnd_pos_time(ha_rows rows) override;
-
+#endif
int write_row(const uchar * buf) override;
int update_row(const uchar * old_data, const uchar * new_data) override;
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index abe7834f36d..275df557dbd 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -1100,14 +1100,44 @@ ulong ha_maria::index_flags(uint inx, uint part, bool all_parts) const
}
-double ha_maria::scan_time()
+/*
+ Update costs that are unique for this TABLE instance
+*/
+
+void ha_maria::update_optimizer_costs(OPTIMIZER_COSTS *costs)
{
- if (file->s->data_file_type == BLOCK_RECORD)
- return (ulonglong2double(stats.data_file_length - file->s->block_size) /
- file->s->block_size) + 2;
- return handler::scan_time();
+ /*
+ Default costs for Aria with BLOCK_FORMAT is the same as MariaDB default
+ costs.
+ */
+ if (file->s->data_file_type != BLOCK_RECORD)
+ {
+ /*
+ MyISAM format row lookup costs are slow as the row data is on a not
+ cached file. Costs taken from ha_myisam.cc
+ */
+ costs->row_next_find_cost= 0.000063539;
+ costs->row_lookup_cost= 0.001014818;
+ }
}
+
+IO_AND_CPU_COST ha_maria::rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST cost= handler::rnd_pos_time(rows);
+ /* file may be 0 if this is an internal temporary file that is not yet opened */
+ if (file && file->s->data_file_type != BLOCK_RECORD)
+ {
+ /*
+ Row data is not cached. costs.row_lookup_cost includes the cost of
+ the reading the row from system (probably cached by the OS).
+ */
+ cost.io= 0;
+ }
+ return cost;
+}
+
+
/*
We need to be able to store at least 2 keys on an index page as the
splitting algorithms depends on this. (With only one key on a page
@@ -3788,6 +3818,12 @@ bool ha_maria::is_changed() const
return file->state->changed;
}
+static void aria_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ costs->rowid_copy_cost= 0.000001; // Just a short memcopy
+ costs->rowid_cmp_cost= 0.000001; // Just a short memcmp
+}
+
static int ha_maria_init(void *p)
{
@@ -3820,6 +3856,7 @@ static int ha_maria_init(void *p)
maria_hton->show_status= maria_show_status;
maria_hton->prepare_for_backup= maria_prepare_for_backup;
maria_hton->end_backup= maria_end_backup;
+ maria_hton->update_optimizer_costs= aria_update_optimizer_costs;
/* TODO: decide if we support Maria being used for log tables */
maria_hton->flags= (HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES |
diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h
index 6b4302145dd..38919d5c542 100644
--- a/storage/maria/ha_maria.h
+++ b/storage/maria/ha_maria.h
@@ -77,8 +77,6 @@ public:
{ return max_supported_key_length(); }
enum row_type get_row_type() const override final;
void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) override final;
- virtual double scan_time() override final;
-
int open(const char *name, int mode, uint test_if_locked) override;
int close(void) override final;
int write_row(const uchar * buf) override;
@@ -114,6 +112,8 @@ public:
int remember_rnd_pos() override final;
int restart_rnd_next(uchar * buf) override final;
void position(const uchar * record) override final;
+ void update_optimizer_costs(OPTIMIZER_COSTS *costs) override final;
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override final;
int info(uint) override final;
int info(uint, my_bool);
int extra(enum ha_extra_function operation) override final;
diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c
index 144b10a86da..2f187090f53 100644
--- a/storage/maria/ma_pagecache.c
+++ b/storage/maria/ma_pagecache.c
@@ -3876,7 +3876,7 @@ restart:
{
pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_ASSERT(0);
- return (uchar*) 0;
+ DBUG_RETURN((uchar*) 0);
}
}
/*
diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp
index 7787f8b83b5..a6693924d9e 100644
--- a/storage/mroonga/ha_mroonga.cpp
+++ b/storage/mroonga/ha_mroonga.cpp
@@ -13008,9 +13008,9 @@ int ha_mroonga::truncate()
DBUG_RETURN(error);
}
-double ha_mroonga::wrapper_scan_time()
+IO_AND_CPU_COST ha_mroonga::wrapper_scan_time()
{
- double res;
+ IO_AND_CPU_COST res;
MRN_DBUG_ENTER_METHOD();
MRN_SET_WRAP_SHARE_KEY(share, table->s);
MRN_SET_WRAP_TABLE_KEY(this, table);
@@ -13020,17 +13020,16 @@ double ha_mroonga::wrapper_scan_time()
DBUG_RETURN(res);
}
-double ha_mroonga::storage_scan_time()
+IO_AND_CPU_COST ha_mroonga::storage_scan_time()
{
MRN_DBUG_ENTER_METHOD();
- double time = handler::scan_time();
- DBUG_RETURN(time);
+ DBUG_RETURN(handler::scan_time());
}
-double ha_mroonga::scan_time()
+IO_AND_CPU_COST ha_mroonga::scan_time()
{
MRN_DBUG_ENTER_METHOD();
- double time;
+ IO_AND_CPU_COST time;
if (share->wrapper_mode)
{
time = wrapper_scan_time();
@@ -13040,51 +13039,87 @@ double ha_mroonga::scan_time()
DBUG_RETURN(time);
}
-double ha_mroonga::wrapper_read_time(uint index, uint ranges, ha_rows rows)
+IO_AND_CPU_COST ha_mroonga::wrapper_rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST res;
+ MRN_DBUG_ENTER_METHOD();
+ MRN_SET_WRAP_SHARE_KEY(share, table->s);
+ MRN_SET_WRAP_TABLE_KEY(this, table);
+ res = wrap_handler->rnd_pos_time(rows);
+ MRN_SET_BASE_SHARE_KEY(share, table->s);
+ MRN_SET_BASE_TABLE_KEY(this, table);
+ DBUG_RETURN(res);
+}
+
+IO_AND_CPU_COST ha_mroonga::storage_rnd_pos_time(ha_rows rows)
{
- double res;
+ MRN_DBUG_ENTER_METHOD();
+ IO_AND_CPU_COST time = handler::rnd_pos_time(rows);
+ DBUG_RETURN(time);
+}
+
+
+IO_AND_CPU_COST ha_mroonga::rnd_pos_time(ha_rows rows)
+{
+ MRN_DBUG_ENTER_METHOD();
+ IO_AND_CPU_COST time;
+ if (share->wrapper_mode)
+ {
+ time = wrapper_rnd_pos_time(rows);
+ } else {
+ time = storage_rnd_pos_time(rows);
+ }
+ DBUG_RETURN(time);
+}
+
+
+IO_AND_CPU_COST ha_mroonga::wrapper_keyread_time(uint index, ulong ranges,
+ ha_rows rows, ulonglong blocks)
+{
+ IO_AND_CPU_COST res;
MRN_DBUG_ENTER_METHOD();
if (index < MAX_KEY) {
KEY *key_info = &(table->key_info[index]);
if (mrn_is_geo_key(key_info)) {
- res = handler::read_time(index, ranges, rows);
+ res = handler::keyread_time(index, ranges, rows, blocks);
DBUG_RETURN(res);
}
MRN_SET_WRAP_SHARE_KEY(share, table->s);
MRN_SET_WRAP_TABLE_KEY(this, table);
- res = wrap_handler->read_time(share->wrap_key_nr[index], ranges, rows);
+ res = wrap_handler->keyread_time(share->wrap_key_nr[index], ranges, rows, blocks);
MRN_SET_BASE_SHARE_KEY(share, table->s);
MRN_SET_BASE_TABLE_KEY(this, table);
} else {
MRN_SET_WRAP_SHARE_KEY(share, table->s);
MRN_SET_WRAP_TABLE_KEY(this, table);
- res = wrap_handler->read_time(index, ranges, rows);
+ res = wrap_handler->keyread_time(index, ranges, rows, blocks);
MRN_SET_BASE_SHARE_KEY(share, table->s);
MRN_SET_BASE_TABLE_KEY(this, table);
}
DBUG_RETURN(res);
}
-double ha_mroonga::storage_read_time(uint index, uint ranges, ha_rows rows)
+IO_AND_CPU_COST ha_mroonga::storage_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks)
{
MRN_DBUG_ENTER_METHOD();
- double time = handler::read_time(index, ranges, rows);
+ IO_AND_CPU_COST time = handler::keyread_time(index, ranges, rows, blocks);
DBUG_RETURN(time);
}
-double ha_mroonga::read_time(uint index, uint ranges, ha_rows rows)
+IO_AND_CPU_COST ha_mroonga::keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks)
{
MRN_DBUG_ENTER_METHOD();
- double time;
+ IO_AND_CPU_COST time;
if (share->wrapper_mode)
{
- time = wrapper_read_time(index, ranges, rows);
+ time = wrapper_keyread_time(index, ranges, rows, blocks);
} else {
- time = storage_read_time(index, ranges, rows);
+ time = storage_keyread_time(index, ranges, rows, blocks);
}
DBUG_RETURN(time);
}
+
#ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING
const key_map *ha_mroonga::wrapper_keys_to_use_for_scanning()
{
diff --git a/storage/mroonga/ha_mroonga.hpp b/storage/mroonga/ha_mroonga.hpp
index 66767899e21..827714f5edb 100644
--- a/storage/mroonga/ha_mroonga.hpp
+++ b/storage/mroonga/ha_mroonga.hpp
@@ -531,8 +531,9 @@ public:
int end_bulk_insert() mrn_override;
int delete_all_rows() mrn_override;
int truncate() mrn_override;
- double scan_time() mrn_override;
- double read_time(uint index, uint ranges, ha_rows rows) mrn_override;
+ IO_AND_CPU_COST scan_time() mrn_override;
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) mrn_override;
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks) mrn_override;
#ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING
const key_map *keys_to_use_for_scanning() mrn_override;
#endif
@@ -1106,10 +1107,12 @@ private:
int wrapper_truncate_index();
int storage_truncate();
int storage_truncate_index();
- double wrapper_scan_time();
- double storage_scan_time();
- double wrapper_read_time(uint index, uint ranges, ha_rows rows);
- double storage_read_time(uint index, uint ranges, ha_rows rows);
+ IO_AND_CPU_COST wrapper_scan_time();
+ IO_AND_CPU_COST storage_scan_time();
+ IO_AND_CPU_COST wrapper_rnd_pos_time(ha_rows rows);
+ IO_AND_CPU_COST storage_rnd_pos_time(ha_rows rows);
+ IO_AND_CPU_COST wrapper_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks);
+ IO_AND_CPU_COST storage_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks);
#ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING
const key_map *wrapper_keys_to_use_for_scanning();
const key_map *storage_keys_to_use_for_scanning();
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index a1de9edd997..bbae99ce2d3 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -804,6 +804,17 @@ ulong ha_myisam::index_flags(uint inx, uint part, bool all_parts) const
return flags;
}
+IO_AND_CPU_COST ha_myisam::rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST cost= handler::rnd_pos_time(rows);
+ /*
+ Row data is not cached. costs.row_lookup_cost includes the cost of
+ the reading the row from system (probably cached by the OS).
+ */
+ cost.io= 0;
+ return cost;
+}
+
/* Name is here without an extension */
int ha_myisam::open(const char *name, int mode, uint test_if_locked)
@@ -2577,6 +2588,22 @@ static int myisam_drop_table(handlerton *hton, const char *path)
return mi_delete_table(path);
}
+
+void myisam_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /*
+ MyISAM row lookup costs are slow as the row data is not cached
+ The following numbers where found by check_costs.pl when using 1M rows
+ and all rows are cached. See optimizer_costs.txt
+ */
+ costs->row_next_find_cost= 0.000063539;
+ costs->row_lookup_cost= 0.001014818;
+ costs->key_next_find_cost= 0.000090585;
+ costs->key_lookup_cost= 0.000550142;
+ costs->key_copy_cost= 0.000015685;
+}
+
+
static int myisam_init(void *p)
{
handlerton *hton;
@@ -2596,6 +2623,7 @@ static int myisam_init(void *p)
hton->create= myisam_create_handler;
hton->drop_table= myisam_drop_table;
hton->panic= myisam_panic;
+ hton->update_optimizer_costs= myisam_update_optimizer_costs;
hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES;
hton->tablefile_extensions= ha_myisam_exts;
mi_killed= mi_killed_in_mariadb;
diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h
index 3843004cc6e..55ce19494d9 100644
--- a/storage/myisam/ha_myisam.h
+++ b/storage/myisam/ha_myisam.h
@@ -82,14 +82,14 @@ class ha_myisam final : public handler
int index_first(uchar * buf);
int index_last(uchar * buf);
int index_next_same(uchar *buf, const uchar *key, uint keylen);
- int ft_init()
+ int ft_init() override
{
if (!ft_handler)
return 1;
ft_handler->please->reinit_search(ft_handler);
return 0;
}
- FT_INFO *ft_init_ext(uint flags, uint inx,String *key)
+ FT_INFO *ft_init_ext(uint flags, uint inx,String *key) override
{
return ft_init_search(flags,file,inx,
(uchar *)key->ptr(), key->length(), key->charset(),
@@ -102,6 +102,7 @@ class ha_myisam final : public handler
int remember_rnd_pos();
int restart_rnd_next(uchar *buf);
void position(const uchar *record);
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override;
int info(uint);
int extra(enum ha_extra_function operation);
int extra_opt(enum ha_extra_function operation, ulong cache_size);
diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc
index d37636abab7..8a1a24d8b82 100644
--- a/storage/myisammrg/ha_myisammrg.cc
+++ b/storage/myisammrg/ha_myisammrg.cc
@@ -339,6 +339,32 @@ static void myrg_set_external_ref(MYRG_INFO *m_info, void *ext_ref_arg)
}
}
+IO_AND_CPU_COST ha_myisammrg::rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST cost= handler::rnd_pos_time(rows);
+ /*
+ Row data is notcached. costs.row_lookup_cost includes the cost of
+ the reading the row from system (probably cached by the OS).
+ */
+ cost.io= 0;
+ return cost;
+}
+
+IO_AND_CPU_COST ha_myisammrg::keyread_time(uint index, ulong ranges,
+ ha_rows rows,
+ ulonglong blocks)
+{
+ IO_AND_CPU_COST cost= handler::keyread_time(index, ranges, rows, blocks);
+ if (!blocks)
+ {
+ cost.io*= file->tables;
+ cost.cpu*= file->tables;
+ }
+ /* Add the cost of having to do a key lookup in all trees */
+ cost.cpu+= (file->tables-1) * (ranges * KEY_LOOKUP_COST);
+ return cost;
+}
+
/**
Open a MERGE parent table, but not its children.
@@ -1744,6 +1770,12 @@ int myisammrg_panic(handlerton *hton, ha_panic_function flag)
return myrg_panic(flag);
}
+static void myisammrg_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ myisam_update_optimizer_costs(costs);
+}
+
+
static int myisammrg_init(void *p)
{
handlerton *myisammrg_hton;
@@ -1759,7 +1791,7 @@ static int myisammrg_init(void *p)
myisammrg_hton->panic= myisammrg_panic;
myisammrg_hton->flags= HTON_NO_PARTITION;
myisammrg_hton->tablefile_extensions= ha_myisammrg_exts;
-
+ myisammrg_hton->update_optimizer_costs= myisammrg_update_optimizer_costs;
return 0;
}
diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h
index 6da327ec84b..6ccf29c7042 100644
--- a/storage/myisammrg/ha_myisammrg.h
+++ b/storage/myisammrg/ha_myisammrg.h
@@ -102,9 +102,17 @@ public:
uint max_supported_keys() const { return MI_MAX_KEY; }
uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; }
uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; }
- double scan_time()
- { return ulonglong2double(stats.data_file_length) / IO_SIZE + file->tables; }
-
+ IO_AND_CPU_COST scan_time() override
+ {
+ IO_AND_CPU_COST cost;
+ cost.io= (ulonglong2double(stats.data_file_length) / IO_SIZE +
+ file->tables) * avg_io_cost();
+ cost.cpu= records() * ROW_NEXT_FIND_COST;
+ return cost;
+ }
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override;
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks) override;
int open(const char *name, int mode, uint test_if_locked);
int add_children_list(void);
int attach_children(void);
diff --git a/storage/oqgraph/ha_oqgraph.h b/storage/oqgraph/ha_oqgraph.h
index c8e175df616..d1f5a898ad7 100644
--- a/storage/oqgraph/ha_oqgraph.h
+++ b/storage/oqgraph/ha_oqgraph.h
@@ -74,9 +74,10 @@ public:
const char **bas_ext() const;
uint max_supported_keys() const { return MAX_KEY; }
uint max_supported_key_part_length() const { return MAX_KEY_LENGTH; }
- double scan_time() { return (double) 1000000000; }
- double read_time(uint index, uint ranges, ha_rows rows)
- { return 1; }
+ IO_AND_CPU_COST scan_time()
+ { return { (double) 1000000000, (double) 1000000000 }; }
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
+ { return { (double) rows, (double) rows }; }
// Doesn't make sense to change the engine on a virtual table.
virtual bool can_switch_engines() { return false; }
diff --git a/storage/perfschema/ha_perfschema.h b/storage/perfschema/ha_perfschema.h
index eab97434265..20ed7448a1e 100644
--- a/storage/perfschema/ha_perfschema.h
+++ b/storage/perfschema/ha_perfschema.h
@@ -104,8 +104,10 @@ public:
ha_rows estimate_rows_upper_bound(void)
{ return HA_POS_ERROR; }
- double scan_time(void)
- { return 1.0; }
+ IO_AND_CPU_COST scan_time(void)
+ {
+ return {0.0, 1.0};
+ }
/**
Open a performance schema table.
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 2fcd729af6d..278732c6832 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -14627,15 +14627,18 @@ bool ha_rocksdb::use_read_free_rpl() const {
}
#endif // MARIAROCKS_NOT_YET
-double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) {
+IO_AND_CPU_COST ha_rocksdb::keyread_time(uint index, ulong ranges,
+ ha_rows rows,
+ ulonglong blocks) {
DBUG_ENTER_FUNC();
if (index != table->s->primary_key) {
/* Non covering index range scan */
- DBUG_RETURN(handler::read_time(index, ranges, rows));
+ DBUG_RETURN(handler::keyread_time(index, ranges, rows, blocks));
}
- DBUG_RETURN((rows / 20.0) + 1);
+ IO_AND_CPU_COST cost= {0, (rows / 20.0) + ranges };
+ DBUG_RETURN(cost);
}
void ha_rocksdb::print_error(int error, myf errflag) {
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 63bf7ffd602..d40fc539b0c 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -623,14 +623,17 @@ public:
bool sorted) override
MY_ATTRIBUTE((__warn_unused_result__));
- virtual double scan_time() override {
+ virtual IO_AND_CPU_COST scan_time() override
+ {
+ IO_AND_CPU_COST cost;
DBUG_ENTER_FUNC();
-
- DBUG_RETURN(
- static_cast<double>((stats.records + stats.deleted) / 20.0 + 10));
+ cost.io= 0;
+ cost.cpu= (stats.records + stats.deleted) * 0.001 + 1;
+ DBUG_RETURN(cost);
}
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges,
+ ha_rows rows, ulonglong blocks) override;
- virtual double read_time(uint, uint, ha_rows rows) override;
virtual void print_error(int error, myf errflag) override;
int open(const char *const name, int mode, uint test_if_locked) override
diff --git a/storage/sequence/sequence.cc b/storage/sequence/sequence.cc
index f5a18094521..eb79d25630c 100644
--- a/storage/sequence/sequence.cc
+++ b/storage/sequence/sequence.cc
@@ -100,9 +100,7 @@ public:
int index_last(uchar *buf);
ha_rows records_in_range(uint inx, const key_range *start_key,
const key_range *end_key, page_range *pages);
- double scan_time() { return (double)nvalues(); }
- double read_time(uint index, uint ranges, ha_rows rows) { return (double)rows; }
- double keyread_time(uint index, uint ranges, ha_rows rows) { return (double)rows; }
+ double avg_io_cost() override { return 0.0; }
private:
void set(uchar *buf);
@@ -492,6 +490,13 @@ int ha_seq_group_by_handler::next_row()
DBUG_RETURN(0);
}
+static void sequence_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ costs->disk_read_ratio= 0.0; // No disk
+ costs->key_next_find_cost= costs->key_lookup_cost=
+ costs->key_copy_cost= costs->row_lookup_cost=
+ costs->row_copy_cost= 0.0000062391530550;
+}
/*****************************************************************************
Initialize the interface between the sequence engine and MariaDB
@@ -518,6 +523,7 @@ static int init(void *p)
hton->savepoint_set= hton->savepoint_rollback= hton->savepoint_release=
dummy_savepoint;
hton->create_group_by= create_group_by_handler;
+ hton->update_optimizer_costs= sequence_update_optimizer_costs;
return 0;
}
diff --git a/storage/sphinx/ha_sphinx.h b/storage/sphinx/ha_sphinx.h
index f03e9d8c797..0b3883f107c 100644
--- a/storage/sphinx/ha_sphinx.h
+++ b/storage/sphinx/ha_sphinx.h
@@ -72,14 +72,28 @@ public:
uint max_supported_key_length () const { return MAX_KEY_LENGTH; }
uint max_supported_key_part_length () const { return MAX_KEY_LENGTH; }
- #if MYSQL_VERSION_ID>50100
- virtual double scan_time () { return (double)( stats.records+stats.deleted )/20.0 + 10; } ///< called in test_quick_select to determine if indexes should be used
- #else
- virtual double scan_time () { return (double)( records+deleted )/20.0 + 10; } ///< called in test_quick_select to determine if indexes should be used
- #endif
-
- virtual double read_time(uint index, uint ranges, ha_rows rows)
- { return ranges + (double)rows/20.0 + 1; } ///< index read time estimate
+ IO_AND_CPU_COST scan_time ()
+ {
+ IO_AND_CPU_COST cost;
+ cost.io= 0;
+ cost.cpu= (double) (stats.records+stats.deleted) * avg_io_cost();
+ return cost;
+ }
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+ {
+ IO_AND_CPU_COST cost;
+ cost.io= ranges;
+ cost.cpu= 0;
+ return cost;
+ }
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows)
+ {
+ IO_AND_CPU_COST cost;
+ cost.io= 0;
+ cost.cpu= 0;
+ return cost;
+ }
public:
int open ( const char * name, int mode, uint test_if_locked );
diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc
index b8d33e38c4e..d31f48e4c09 100644
--- a/storage/spider/ha_spider.cc
+++ b/storage/spider/ha_spider.cc
@@ -8508,38 +8508,47 @@ int ha_spider::truncate()
DBUG_RETURN(0);
}
-
-double ha_spider::scan_time()
+IO_AND_CPU_COST ha_spider::scan_time()
{
+ IO_AND_CPU_COST cost;
DBUG_ENTER("ha_spider::scan_time");
DBUG_PRINT("info",("spider this=%p", this));
- DBUG_PRINT("info",("spider scan_time = %.6f",
- share->scan_rate * share->stat.records * share->stat.mean_rec_length + 2));
- DBUG_RETURN(share->scan_rate * share->stat.records *
- share->stat.mean_rec_length + 2);
+ cost.io=0;
+ cost.cpu= (DISK_READ_COST * share->stat.records * share->stat.mean_rec_length);
+ DBUG_PRINT("info",("spider scan_time = %.6f", cost.cpu));
+ DBUG_RETURN(cost);
}
-double ha_spider::read_time(
- uint index,
- uint ranges,
- ha_rows rows
-) {
- DBUG_ENTER("ha_spider::read_time");
+IO_AND_CPU_COST ha_spider::rnd_pos_time(ha_rows rows)
+{
+ IO_AND_CPU_COST cost= { 0.0, 0.0}; // Row is in memory
+ return cost;
+}
+
+IO_AND_CPU_COST ha_spider::keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks)
+{
+ IO_AND_CPU_COST cost;
+ DBUG_ENTER("ha_spider::keyread_time");
DBUG_PRINT("info",("spider this=%p", this));
+
+ /*
+ Here we only calculate transfer costs. The normal handler cost functions
+ will add costs for accessing a row/key.
+ */
if (wide_handler->keyread)
{
- DBUG_PRINT("info",("spider read_time(keyread) = %.6f",
- share->read_rate * table->key_info[index].key_length *
- rows / 2 + 2));
- DBUG_RETURN(share->read_rate * table->key_info[index].key_length *
- rows / 2 + 2);
+ cost.io= 0;
+ cost.cpu= DISK_READ_COST * rows * table->key_info[index].key_length;
} else {
- DBUG_PRINT("info",("spider read_time = %.6f",
- share->read_rate * share->stat.mean_rec_length * rows + 2));
- DBUG_RETURN(share->read_rate * share->stat.mean_rec_length * rows + 2);
+ cost.io= 0;
+ cost.cpu= DISK_READ_COST * rows * share->stat.mean_rec_length;
}
+ DBUG_PRINT("info",("spider scan_time(keyread) = %.6f", cost.cpu));
+ DBUG_RETURN(cost);
}
+
const key_map *ha_spider::keys_to_use_for_scanning()
{
DBUG_ENTER("ha_spider::keys_to_use_for_scanning");
diff --git a/storage/spider/ha_spider.h b/storage/spider/ha_spider.h
index 4dffdf78553..1c5c867b2f8 100644
--- a/storage/spider/ha_spider.h
+++ b/storage/spider/ha_spider.h
@@ -445,12 +445,10 @@ public:
);
int delete_all_rows();
int truncate();
- double scan_time();
- double read_time(
- uint index,
- uint ranges,
- ha_rows rows
- );
+ IO_AND_CPU_COST scan_time();
+ IO_AND_CPU_COST rnd_pos_time(ha_rows rows);
+ IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows,
+ ulonglong blocks);
const key_map *keys_to_use_for_scanning();
ha_rows estimate_rows_upper_bound();
void print_error(
diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test
index 60c0ad42921..02a4b803a89 100644
--- a/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test
+++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test
@@ -2,6 +2,10 @@
--echo # MDEV-27172 Prefix indices on Spider tables may lead to wrong query results
--echo #
+# Disable test for ps-protocol as the general log has different number of
+# commands for --ps
+--source include/no_protocol.inc
+
--disable_query_log
--disable_result_log
--source ../../t/test_init.inc
diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc
index 03624d475dc..cd158c6102d 100644
--- a/storage/spider/spd_table.cc
+++ b/storage/spider/spd_table.cc
@@ -6520,6 +6520,25 @@ int spider_panic(
DBUG_RETURN(0);
}
+static void spider_update_optimizer_costs(OPTIMIZER_COSTS *costs)
+{
+ /* Assume 1 Gigabyte network */
+ costs->disk_read_cost= IO_SIZE/(1000000000/8)*1000.00000;
+ costs->index_block_copy_cost= 0; // Not used
+
+ /*
+ The following costs are copied from ha_innodb.cc
+ The assumption is that the default storage engine used with Spider is
+ InnoDB.
+ */
+ costs->row_next_find_cost= 0.00007013;
+ costs->row_lookup_cost= 0.00076597;
+ costs->key_next_find_cost= 0.00009900;
+ costs->key_lookup_cost= 0.00079112;
+ costs->row_copy_cost= 0.00006087;
+}
+
+
int spider_db_init(
void *p
) {
@@ -6563,6 +6582,7 @@ int spider_db_init(
spider_hton->show_status = spider_show_status;
spider_hton->create_group_by = spider_create_group_by_handler;
spider_hton->table_options= spider_table_option_list;
+ spider_hton->update_optimizer_costs= spider_update_optimizer_costs;
if (my_gethwaddr((uchar *) addr))
{
diff --git a/tests/check_costs.pl b/tests/check_costs.pl
new file mode 100755
index 00000000000..0e3b538b65b
--- /dev/null
+++ b/tests/check_costs.pl
@@ -0,0 +1,1005 @@
+#!/usr/bin/env perl
+
+# Copyright (C) 2022 MariaDB Foundation
+# Use is subject to license terms
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+# This is a test that runs queries to meassure if the MariaDB cost calculations
+# are reasonable.
+#
+# The following test are run:
+# - Full table scan of a table
+# - Range scan of the table
+# - Index scan of the table
+#
+# The output can be used to finetune the optimizer cost variables.
+#
+# The table in question is a similar to the 'lineitem' table used by DBT3
+# it has 16 field and could be regarded as a 'average kind of table'.
+# Number of fields and record length places a small role when comparing
+# index scan and table scan
+
+##################### Standard benchmark inits ##############################
+
+use DBI;
+use Getopt::Long;
+use Benchmark ':hireswallclock';
+
+package main;
+
+$opt_rows=1000000;
+$opt_test_runs= 2; # Run each test 2 times and take the average
+$opt_verbose="";
+$opt_host="";
+$opt_db="test";
+$opt_user="test";
+$opt_password="";
+$opt_socket=undef;
+$opt_skip_drop= undef;
+$opt_skip_create= undef;
+$opt_init_query= undef;
+$opt_print_analyze= undef;
+$opt_where_check= undef;
+$opt_engine=undef;
+$opt_comment=undef;
+$opt_table_suffix=undef;
+$opt_table_name= undef;
+$opt_grof= undef;
+$opt_all_tests=undef;
+$opt_ratios= undef;
+$opt_mysql= undef;
+$has_force_index=1;
+
+@arguments= @ARGV;
+
+GetOptions("host=s","user=s","password=s", "rows=i","test-runs=i","socket=s",
+ "db=s", "table-name=s", "skip-drop","skip-create",
+ "init-query=s","engine=s","comment=s",
+ "gprof", "one-test=s",
+ "mysql", "all-tests", "ratios", "where-check",
+ "print-analyze", "verbose") ||
+ die "Aborted";
+
+$Mysql::db_errstr=undef; # Ignore warnings from these
+
+my ($base_table, $table, $dbh, $where_cost, $real_where_cost, $perf_ratio);
+
+if (!$opt_mysql)
+{
+ @engines= ("aria","innodb","myisam","heap");
+}
+else
+{
+ @engines= ("innodb","myisam","heap");
+}
+
+# Special handling for some engines
+
+$no_force= 0;
+
+if (defined($opt_engine))
+{
+ if (lc($engine) eq "archive")
+ {
+ $has_force_index= 0; # Skip tests with force index
+ }
+}
+
+
+if (defined($opt_gprof) || defined($opt_one_test))
+{
+ die "one_test must be defined when --gprof is used"
+ if (!defined($opt_one_test));
+ die "engine must be defined when --gprof or --one-test is used"
+ if (!defined($opt_engine));
+ die "function '$opt_one_test' does not exist\n"
+ if (!defined(&{$opt_one_test}));
+}
+
+# We add engine_name to the table name later
+
+$opt_table_name="check_costs" if (!defined($opt_table_name));
+$base_table="$opt_db.$opt_table_name";
+
+####
+#### Start timeing and start test
+####
+
+$|= 1; # Autoflush
+if ($opt_verbose)
+{
+ $opt_print_analyze= 1;
+}
+
+####
+#### Create the table
+####
+
+my %attrib;
+
+$attrib{'PrintError'}=0;
+
+if (defined($opt_socket))
+{
+ $attrib{'mariadb_socket'}=$opt_socket;
+}
+
+$dbh = DBI->connect("DBI:MariaDB:$opt_db:$opt_host",
+ $opt_user, $opt_password,\%attrib) || die $DBI::errstr;
+
+print_mariadb_version();
+print "Server options: $opt_comment\n" if (defined($opt_comment));
+print "Running tests with $opt_rows rows\n";
+
+print "Program arguments:\n";
+for ($i= 0 ; $i <= $#arguments; $i++)
+{
+ my $arg=$arguments[$i];
+ if ($arg =~ / /)
+ {
+ if ($arg =~ /([^ =]*)=(.*)/)
+ {
+ print "$1=\"$2\" ";
+ }
+ else
+ {
+ print "\"$arg\"" . " ";
+ }
+ }
+ else
+ {
+ print $arguments[$i] . " ";
+ }
+}
+print "\n\n";
+
+@test_names=
+ ("table scan no where", "table scan simple where",
+ "table scan where no match", "table scan complex where", "table scan",
+ "index scan", "index scan 4 parts", "range scan", "eq_ref_index_join",
+ "eq_ref_cluster_join", "eq_ref_join", "eq_ref_btree");
+$where_tests=3; # Number of where test to be compared with test[0]
+
+if ($opt_mysql)
+{
+ create_seq_table();
+}
+
+
+if ($opt_engine || defined($opt_one_test))
+{
+ test_engine(0, $opt_engine);
+}
+else
+{
+ my $i;
+ undef($opt_skip_create);
+ for ($i= 0 ; $i <= $#engines; $i++)
+ {
+ test_engine($i, $engines[$i]);
+
+ if ($i > 0 && $opt_ratios)
+ {
+ print "\n";
+ my $j;
+
+ print "Ratios $engines[$i] / $engines[0]\n";
+ for ($j= $where_tests+1 ; $j <= $#test_names ; $j++)
+ {
+ if ($res[$i][$j])
+ {
+ my $cmp_cost= $res[0][$j]->{'cost'} - $res[0][$j]->{'where_cost'};
+ my $cmp_time= $res[0][$j]->{'time'};
+ my $cur_cost= $res[$i][$j]->{'cost'} - $res[$i][$j]->{'where_cost'};
+ my $cur_time= $res[$i][$j]->{'time'};
+
+ printf "%14.14s cost: %6.4f time: %6.4f cost_multiplier: %6.4f\n",
+ $test_names[$j],
+ $cur_cost / $cmp_cost,
+ $cur_time / $cmp_time,
+ ($cmp_cost * ($cur_time / $cmp_time))/$cur_cost;
+ }
+000000 }
+ }
+# if ($i + 1 <= $#engines)
+ {
+ print "-------------------------\n\n";
+ }
+ }
+ print_totals();
+}
+
+$dbh->do("drop table if exists $table") if (!defined($opt_skip_drop));
+$dbh->disconnect; $dbh=0; # Close handler
+exit(0);
+
+
+sub test_engine()
+{
+ my ($i, $engine)= @_;
+ my ($cur_rows);
+
+ setup($opt_init_query);
+ setup_engine($engine);
+ $table= $base_table . "_$engine";
+ if (!defined($opt_skip_create))
+ {
+ my $index_type="";
+
+ # We should use btree index with heap to ge range scans
+ $index_type= "using btree" if (lc($engine) eq "heap");
+
+ print "Creating table $table of type $engine\n";
+ $dbh->do("drop table if exists $table");
+ $dbh->do("create table $table (
+ `l_orderkey` int(11) NOT NULL,
+ `l_partkey` int(11) DEFAULT NULL,
+ `l_suppkey` int(11) DEFAULT NULL,
+ `l_linenumber` int(11) NOT NULL,
+ `l_extra` int(11) NOT NULL,
+ `l_quantity` double DEFAULT NULL,
+ `l_extendedprice` double DEFAULT NULL,
+ `l_discount` double DEFAULT NULL,
+ `l_tax` double DEFAULT NULL,
+ `l_returnflag` char(1) DEFAULT NULL,
+ `l_linestatus` char(1) DEFAULT NULL,
+ `l_shipDATE` date DEFAULT NULL,
+ `l_commitDATE` date DEFAULT NULL,
+ `l_receiptDATE` date DEFAULT NULL,
+ `l_shipinstruct` char(25) DEFAULT NULL,
+ `l_shipmode` char(10) DEFAULT NULL,
+ `l_comment` varchar(44) DEFAULT NULL,
+ PRIMARY KEY (`l_orderkey`),
+ UNIQUE (`l_linenumber`),
+ UNIQUE (`l_extra`) $index_type,
+ KEY `l_suppkey` $index_type (l_suppkey, l_partkey),
+ KEY `long_suppkey` $index_type
+ (l_partkey, l_suppkey, l_linenumber, l_extra) )
+ ENGINE= $engine")
+ or die "Got error on CREATE TABLE: $DBI::errstr";
+ }
+ $cur_rows= get_row_count();
+ if ($cur_rows == 0 || !defined($opt_skip_create))
+ {
+ $dbh->do("insert into $table select
+ seq, seq/10, seq, seq, seq, seq, seq, mod(seq,10)*10,
+ 0, 'a','b',
+ date_add('2000-01-01', interval seq/500 day),
+ date_add('2000-01-10', interval seq/500 day),
+ date_add('2000-01-20', interval seq/500 day),
+ left(md5(seq),25),
+ if(seq & 1,'mail','ship'),
+ repeat('a',mod(seq,40))
+ from seq_1_to_$opt_rows")
+ or die "Got error on INSERT: $DBI::errstr";
+
+ $sth= $dbh->do("analyze table $table")
+ or die "Got error on 'analyze table: " . $dbh->errstr . "\n";
+ }
+ else
+ {
+ $opt_rows= $cur_rows;
+ die "Table $table is empty. Please run without --skip-create"
+ if ($opt_rows == 0);
+ print "Reusing old table $table, which has $opt_rows rows\n";
+ }
+
+ if (!$opt_mysql)
+ {
+ $where_cost=get_variable("optimizer_where_cost");
+ if (defined($where_cost))
+ {
+ # Calculate cost of where once. Must be done after table is created
+ $real_where_cost= get_where_cost();
+ $perf_ratio= $real_where_cost/$where_cost;
+ printf "Performance ratio compared to base computer: %6.4f\n",
+ $perf_ratio;
+ }
+ print "\n";
+ }
+ else
+ {
+ $where_cost=0.1; # mysql 'm_row_evaluate_cost'
+ }
+
+
+ if (defined($opt_one_test))
+ {
+ if (defined($opt_gprof))
+ {
+ # Argument is the name of the test function
+ test_with_gprof($opt_one_test, 10);
+ return;
+ }
+ $opt_one_test->();
+ return;
+ }
+
+ if ($opt_where_check)
+ {
+ $res[$i][0]= table_scan_without_where(0);
+ $res[$i][1]= table_scan_with_where(1);
+ $res[$i][2]= table_scan_with_where_no_match(2);
+ $res[$i][3]= table_scan_with_complex_where(3);
+ }
+ $res[$i][4]= table_scan_without_where_analyze(4);
+ $res[$i][5]= index_scan(5);
+ $res[$i][6]= index_scan_4_parts(6) if ($opt_all_tests);
+ $res[$i][7]= range_scan(7);
+ $res[$i][8]= eq_ref_index_join(8);
+ $res[$i][9]= eq_ref_clustered_join(9);
+ $res[$i][10]= eq_ref_join(10);
+ $res[$i][11]= eq_ref_join_btree(11);
+
+ if ($opt_where_check)
+ {
+ printf "Variable optimizer_where_cost: cur: %6.4f real: %6.4f prop: %6.4f\n",
+ $where_cost, $real_where_cost, $perf_ratio;
+ print "Ratio of WHERE costs compared to scan without a WHERE\n";
+ for ($j= 1 ; $j <= $where_tests ; $j++)
+ {
+ print_where_costs($i,$j,0);
+ }
+ print "\n";
+ }
+
+ print "Cost/time ratio for different scans types\n";
+ for ($j= $where_tests+1 ; $j <= $#test_names ; $j++)
+ {
+ if ($res[$i][$j])
+ {
+ print_costs($test_names[$j], $res[$i][$j]);
+ }
+ }
+}
+
+
+sub print_costs($;$)
+{
+ my ($name, $cur_res)= @_;
+
+ # Cost without where clause
+ my $cur_cost= $cur_res->{'cost'} - $cur_res->{'where_cost'};
+ my $cur_time= $cur_res->{'time'};
+
+ printf "%-20.20s cost: %9.4f time: %9.4f cost/time: %8.4f\n",
+ $name,
+ $cur_cost, $cur_time, $cur_cost/$cur_time;
+}
+
+sub print_where_costs()
+{
+ my ($index, $cmp, $base)= @_;
+
+ my $cmp_time= $res[$index][$cmp]->{'time'};
+ my $base_time= $res[$index][$base]->{'time'};
+
+ printf "%-30.30s time: %6.4f\n", $test_names[$cmp], $cmp_time / $base_time;
+}
+
+
+# Used to setup things like optimizer_switch or optimizer_cache_hit_ratio
+
+sub setup()
+{
+ my ($query)= @_;
+ my ($sth,$query);
+
+ $sth= $dbh->do("flush tables") ||
+ die "Got error on 'flush tables': " . $dbh->errstr . "\n";
+ if (defined($query))
+ {
+ $sth= $dbh->do("$query") ||
+ die "Got error on '$query': " . $dbh->errstr . "\n";
+ }
+
+ # Set variables that may interfer with timings
+ $query= "set \@\@optimizer_switch='index_condition_pushdown=off'";
+ $sth= $dbh->do($query) ||
+ die "Got error on '$query': " . $dbh->errstr . "\n";
+}
+
+
+sub setup_engine()
+{
+ my ($engine)= @_;
+ my ($sth,$query);
+
+ if (!$opt_mysql)
+ {
+ # Set variables that may interfere with timings
+ $query= "set global $engine.optimizer_disk_read_ratio=0";
+ $sth= $dbh->do($query) ||
+ die "Got error on '$query': " . $dbh->errstr . "\n";
+ }
+}
+
+sub create_seq_table
+{
+ my $name= "seq_1_to_$opt_rows";
+ my $i;
+ print "Creating $name\n";
+ $dbh->do("drop table if exists $name") ||
+ die "Error on drop: " . $dbh->errstr ."\n";
+ $dbh->do("create table $name (seq int(11) not null) engine=heap")
+ || die "Error on create: " . $dbh->errstr ."\n";
+ for ($i= 1 ; $i < $opt_rows ; $i+=10)
+ {
+ $dbh->do("insert into $name values
+ ($i),($i+1),($i+2),($i+3),($i+4),($i+5),($i+6),($i+7),($i+8),($i+9)") || die "Error on insert";
+ }
+}
+
+
+
+##############################################################################
+# Query functions
+##############################################################################
+
+# Calculate the cost of the WHERE clause
+
+sub table_scan_without_where()
+{
+ my ($query_id)= @_;
+ return run_query($test_names[$query_id],
+ "table_scan", "ALL", $opt_rows,
+"select sum(l_quantity) from $table");
+}
+
+sub table_scan_with_where()
+{
+ my ($query_id)= @_;
+ return run_query($test_names[$query_id],
+ "table_scan", "ALL", $opt_rows,
+"select sum(l_quantity) from $table where l_commitDate >= '2000-01-01' and l_tax >= 0.0");
+}
+
+sub table_scan_with_where_no_match()
+{
+ my ($query_id)= @_;
+ return run_query($test_names[$query_id],
+ "table_scan", "ALL", $opt_rows,
+"select sum(l_quantity) from $table where l_commitDate >= '2000-01-01' and l_tax > 0.0 /* NO MATCH */");
+}
+
+
+sub table_scan_with_complex_where()
+{
+ my ($query_id)= @_;
+ return run_query($test_names[$query_id],
+ "table_scan", "ALL", $opt_rows,
+"select sum(l_quantity) from $table where l_commitDate >= '2000-01-01' and l_quantity*l_extendedprice-l_discount+l_tax > 0.0");
+}
+
+# Calculate the time spent for table accesses (done with analyze statment)
+
+# Table scan
+
+sub table_scan_without_where_analyze()
+{
+ my ($query_id)= @_;
+ return run_query_with_analyze($test_names[$query_id],
+ "table_scan", "ALL", $opt_rows,
+"select sum(l_quantity) from $table");
+}
+
+# Index scan with 2 key parts
+
+sub index_scan()
+{
+ my ($query_id)= @_;
+ return 0 if (!$has_force_index);
+ my ($query_id)= @_;
+ return run_query_with_analyze($test_names[$query_id],
+ "index_scan", "index", $opt_rows,
+"select count(*) from $table force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0");
+}
+
+# Index scan with 2 key parts
+# This is to check how the number of key parts affects the timeings
+
+sub index_scan_4_parts()
+{
+ my ($query_id)= @_;
+ return 0 if (!$has_force_index);
+ return run_query_with_analyze($test_names[$query_id],
+ "index_scan_4_parts", "index", $opt_rows,
+"select count(*) from $table force index (long_suppkey) where l_linenumber >= 0 and l_extra >0");
+}
+
+sub range_scan()
+{
+ my ($query_id)= @_;
+ return 0 if (!$has_force_index);
+ return run_query_with_analyze($test_names[$query_id],
+ "range_scan", "range", $opt_rows,
+"select sum(l_orderkey) from $table force index(l_suppkey) where l_suppkey >= 0 and l_partkey >=0 and l_discount>=0.0");
+}
+
+sub eq_ref_index_join()
+{
+ my ($query_id)= @_;
+ return run_query_with_analyze($test_names[$query_id],
+ "eq_ref_index_join", "eq_ref", 1,
+"select straight_join count(*) from seq_1_to_$opt_rows,$table where seq=l_linenumber");
+}
+
+sub eq_ref_clustered_join()
+{
+ my ($query_id)= @_;
+ return run_query_with_analyze($test_names[$query_id],
+ "eq_ref_cluster_join", "eq_ref", 1,
+"select straight_join count(*) from seq_1_to_$opt_rows,$table where seq=l_orderkey");
+}
+
+sub eq_ref_join()
+{
+ my ($query_id)= @_;
+ return run_query_with_analyze($test_names[$query_id],
+ "eq_ref_join", "eq_ref", 1,
+"select straight_join count(*) from seq_1_to_$opt_rows,$table where seq=l_linenumber and l_partkey >= 0");
+}
+
+sub eq_ref_join_btree()
+{
+ my ($query_id)= @_;
+ return run_query_with_analyze($test_names[$query_id],
+ "eq_ref_btree", "eq_ref", 1,
+"select straight_join count(*) from seq_1_to_$opt_rows,$table where seq=l_extra and l_partkey >= 0");
+}
+
+
+# Calculate the cost of a basic where clause
+# This can be used to find out the speed of the current computer compared
+# to the reference computer on which the costs where calibrated.
+
+sub get_where_cost()
+{
+ my ($loop);
+ $loop=10000000;
+ # Return time in microseconds for one where (= optimizer_where_cost)
+ return query_time("select benchmark($loop, l_commitDate >= '2000-01-01' and l_tax >= 0.0) from $table limit 1")/$loop;
+}
+
+
+# Run a query to be able to calculate the costs of filter
+
+sub cost_of_filtering()
+{
+ my ($query, $cost1, $cost2);
+ do_query("set \@\@max_rowid_filter_size=10000000," .
+ "optimizer_switch='rowid_filter=on',".
+ "\@\@optimizer_scan_setup_cost=1000000");
+ do_query("set \@old_cost=\@\@aria.OPTIMIZER_ROW_LOOKUP_COST");
+ do_query("set global aria.OPTIMIZER_ROW_LOOKUP_COST=1");
+ do_query("flush tables");
+ $cost1= run_query_with_analyze("range", "range", "range", 500000,
+ "select count(l_discount) from check_costs_aria as t1 where t1.l_orderkey between 1 and 500000");
+ $cost2= run_query_with_analyze("range-all", "range-all", "range|filter", 500000,
+ "select count(l_discount) from check_costs_aria as t1 where t1.l_orderkey between 1 and 500000 and l_linenumber between 1 and 500000");
+ $cost3= run_query_with_analyze("range-none","range-none", "range|filter", 500000,
+ "select count(l_discount) from check_costs_aria as t1 where t1.l_orderkey between 1 and 500000 and l_linenumber between 500000 and 1000000");
+ do_query("set global aria.OPTIMIZER_ROW_LOOKUP_COST=\@old_cost");
+ do_query("flush tables");
+ print_costs("range", $cost1);
+ print_costs("filter-all", $cost2);
+ print_costs("filter-none", $cost3);
+}
+
+sub gprof_cost_of_filtering()
+{
+ $cost2= run_query_with_analyze("gprof","range-all", "range|filter", 500000,
+ "select count(l_discount) from check_costs_aria as t1 where t1.l_orderkey between 1 and 500000 and l_linenumber between 1 and 500000");
+}
+
+
+###############################################################################
+# Help functions for running the queries
+###############################################################################
+
+
+# Run query and return time for query in microseconds
+
+sub query_time()
+{
+ my ($query)= @_;
+ my ($start_time,$end_time,$time,$ms,$sth,$row);
+
+ $start_time= new Benchmark;
+ $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $end_time=new Benchmark;
+ $row= $sth->fetchrow_arrayref();
+ $sth=0;
+
+ $time= timestr(timediff($end_time, $start_time),"nop");
+ $time =~ /([\d.]*)/;
+ return $1*1000000.0;
+}
+
+#
+# Run a query and compare the clock time
+#
+
+sub run_query()
+{
+ my ($full_name, $name, $type, $expected_rows, $query)= @_;
+ my ($start_time,$end_time,$sth,@row,%res,$i,$optimizer_rows);
+ my ($extra, $last_type, $adjust_cost, $ms);
+ $adjust_cost=1.0;
+
+ print "Timing full query: $full_name\n$query\n";
+
+ $sth= $dbh->prepare("explain $query") || die "Got error on 'explain $query': " . $dbh->errstr . "\n";
+ $sth->execute || die "Got error on 'explain $query': " . $dbh->errstr . "\n";
+
+ print "explain:\n";
+ while ($row= $sth->fetchrow_arrayref())
+ {
+ print $row->[0];
+ for ($i= 1 ; $i < @$row; $i++)
+ {
+ print " " . $row->[$i] if (defined($row->[$i]));
+ }
+ print "\n";
+
+ $extra= $row->[@$row-1];
+ $last_type= $row->[3];
+ $optimizer_rows= $row->[8];
+ }
+ if ($last_type ne $type &&
+ ($type ne "index" || !($extra =~ /Using index/)))
+ {
+ print "Warning: Wrong scan type: '$last_type', expected '$type'\n";
+ }
+
+ if ($expected_rows >= 0 &&
+ (abs($optimizer_rows - $expected_rows)/$expected_rows) > 0.1)
+ {
+ printf "Warning: Expected $expected_rows instead of $optimizer_rows from EXPLAIN. Adjusting costs\n";
+ $adjust_cost= $expected_rows / $optimizer_rows;
+ }
+
+ # Do one query to fill the cache
+ $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $end_time=new Benchmark;
+ $row= $sth->fetchrow_arrayref();
+ $sth=0;
+
+ # Run query for real
+ $start_time= new Benchmark;
+ $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $end_time=new Benchmark;
+ $row= $sth->fetchrow_arrayref();
+ $sth=0;
+
+ $time= timestr(timediff($end_time, $start_time),"nop");
+ $time =~ /([\d.]*)/;
+ $ms= $1*1000.0;
+
+ $query= "show status like 'last_query_cost'";
+ $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";;
+ $row= $sth->fetchrow_arrayref();
+ $sth=0;
+ $cost= $row->[1] * $adjust_cost;
+ printf "%10s time: %10.10s ms cost: %6.4f", $name, $ms, $cost;
+ if ($adjust_cost != 1.0)
+ {
+ printf " (was %6.4f)", $row->[1];
+ }
+ print "\n\n";
+
+ $res{'cost'}= $cost;
+ $res{'time'}= $ms;
+ return \%res;
+}
+
+#
+# Run a query and compare the table access time from analyze statement
+# The cost works for queries with one or two tables!
+#
+
+sub run_query_with_analyze()
+{
+ my ($full_name,$name, $type, $expected_rows, $query)= @_;
+ my ($start_time,$end_time,$sth,@row,%res,$i,$j);
+ my ($optimizer_rows, $optimizer_rows_first);
+ my ($adjust_cost, $ms, $second_ms, $analyze, $local_where_cost);
+ my ($extra, $last_type, $tot_ms, $found_two_tables);
+
+ $found_two_tables= 0;
+ $adjust_cost=1.0;
+ if (!$opt_mysql)
+ {
+ $local_where_cost= $where_cost/1000 * $opt_rows;
+ }
+ else
+ {
+ $local_where_cost= $where_cost * $opt_rows;
+ }
+ $optimizer_rows_first= undef;
+
+ print "Timing table access for query: $full_name\n$query\n";
+
+ $sth= $dbh->prepare("explain $query") || die "Got error on 'explain $query': " . $dbh->errstr . "\n";
+ $sth->execute || die "Got error on 'explain $query': " . $dbh->errstr . "\n";
+
+ print "explain:\n";
+ if (!$opt_mysql)
+ {
+ $type_pos= 3;
+ $row_pos= 8;
+ }
+ else
+ {
+ $type_pos= 4;
+ $row_pos= 9;
+ }
+
+ $j= 0;
+ while ($row= $sth->fetchrow_arrayref())
+ {
+ $j++;
+ print $row->[0];
+ for ($i= 1 ; $i < @$row; $i++)
+ {
+ print " " . $row->[$i] if (defined($row->[$i]));
+ # print " X" if (!defined($row->[$i]));
+ }
+ print "\n";
+
+ $extra= $row->[@$row-1];
+ $last_type= $row->[$type_pos];
+ if (!defined($optimizer_rows_first))
+ {
+ $optimizer_rows_first= $row->[$row_pos];
+ }
+ $optimizer_rows= $row->[$row_pos];
+ }
+ $found_two_tables= 1 if ($j > 1);
+
+ if ($last_type ne $type &&
+ ($type ne "index" || !($extra =~ /Using index/)))
+ {
+ print "Warning: Wrong scan type: '$last_type', expected '$type'\n";
+ }
+ if ($expected_rows >= 0 &&
+ (abs($optimizer_rows - $expected_rows)/$expected_rows) > 0.1)
+ {
+ printf "Warning: Expected $expected_rows instead of $optimizer_rows from EXPLAIN. Adjusting costs\n";
+ $adjust_cost= $expected_rows / $optimizer_rows;
+ }
+
+ # Do one query to fill the cache
+ if (!defined($opt_grof))
+ {
+ $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $row= $sth->fetchrow_arrayref();
+ $sth=0;
+ }
+
+ # Run the query through analyze statement
+ $tot_ms=0;
+ if (!$opt_mysql)
+ {
+ for ($i=0 ; $i < $opt_test_runs ; $i++)
+ {
+ my ($j);
+ $sth= $dbh->prepare("analyze format=json $query" ) || die "Got error on 'analzye $query': " . $dbh->errstr . "\n";
+ $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $row= $sth->fetchrow_arrayref();
+ $analyze= $row->[0];
+ $sth=0;
+
+ # Fetch the timings
+ $j=0;
+ while ($analyze =~ /r_table_time_ms": ([0-9.]*)/g)
+ {
+ $tot_ms= $tot_ms+ $1;
+ $j++;
+ }
+ if ($j > 2)
+ {
+ die "Found too many tables, program needs to be extended!"
+ }
+ # Add cost of filtering
+ while ($analyze =~ /r_filling_time_ms": ([0-9.]*)/g)
+ {
+ $tot_ms= $tot_ms+ $1;
+ }
+ }
+ }
+ else
+ {
+ my $local_table= substr($table,index($table,".")+1);
+ for ($i=0 ; $i < $opt_test_runs ; $i++)
+ {
+ my ($j);
+ $sth= $dbh->prepare("explain analyze $query" ) || die "Got error on 'analzye $query': " . $dbh->errstr . "\n";
+ $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $row= $sth->fetchrow_arrayref();
+ $analyze= $row->[0];
+ $sth=0;
+ }
+ # Fetch the timings
+ $j=0;
+
+ if ($analyze =~ / $local_table .*actual time=([0-9.]*) .*loops=([0-9]*)/g)
+ {
+ my $times= $1;
+ my $loops= $2;
+ $times =~ /\.\.([0-9.]*)/;
+ $times= $1;
+ $times="0.005" if ($times == 0);
+ #print "time: $times \$1: $1 loops: $loops\n";
+ $tot_ms= $tot_ms+ $times*$loops;
+ $j++;
+ }
+ if ($j > 1)
+ {
+ die "Found too many tables, program needs to be extended!"
+ }
+ }
+
+
+ if ($found_two_tables)
+ {
+ # Add the cost of the where for the two tables. The last table
+ # is assumed to have $expected_rows while the first (driving table)
+ # may have less rows. Take that into account when calculalting the
+ # total where cost.
+ $local_where_cost= ($local_where_cost +
+ $local_where_cost *
+ ($optimizer_rows_first/$opt_rows));
+ }
+ $ms= $tot_ms/$opt_test_runs;
+
+ if ($opt_print_analyze)
+ {
+ print "\nanalyze:\n" . $analyze . "\n\n";
+ }
+
+ if (!defined($opt_grof))
+ {
+ # Get last query cost
+ $query= "show status like 'last_query_cost'";
+ $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n";
+ $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";;
+ $row= $sth->fetchrow_arrayref();
+ $sth=0;
+ $cost= $row->[1] * $adjust_cost;
+
+ printf "%10s time: %10.10s ms cost-where: %6.4f cost: %6.4f",
+ $name, $ms, $cost - $local_where_cost, $cost;
+ if ($adjust_cost != 1.0)
+ {
+ printf " (cost was %6.4f)", $row->[1];
+ }
+ }
+ else
+ {
+ printf "%10s time: %10.10s ms", $name, $ms;
+ $cost= 0; $local_where_cost= 0;
+ }
+ print "\n\n";
+
+ $res{'cost'}= $cost;
+ $res{'where_cost'}= $local_where_cost;
+ $res{'time'}= $ms;
+ return \%res;
+}
+
+
+sub do_query()
+{
+ my ($query)= @_;
+ $dbh->do($query) || die "Got error on '$query': " . $dbh->errstr . "\n";
+}
+
+
+sub print_totals()
+{
+ my ($i, $j);
+ print "Totals per test\n";
+ for ($j= $where_tests+1 ; $j <= $#test_names; $j++)
+ {
+ print "$test_names[$j]:\n";
+ for ($i= $0 ; $i <= $#engines ; $i++)
+ {
+ if ($res[$i][$j])
+ {
+ my $cost= $res[$i][$j]->{'cost'} - $res[$i][$j]->{'where_cost'};
+ my $ms= $res[$i][$j]->{'time'};
+ printf "%-8s %10.4f ms cost: %10.4f cost/time: %8.4f\n",
+ $engines[$i], $ms, $cost, $cost/$ms;
+ }
+ }
+ }
+}
+
+
+# This function can be used to test things with gprof
+
+sub test_with_gprof()
+{
+ my ($function_ref, $loops)= @_;
+ my ($sum, $i, $cost);
+
+ printf "Running test $function_ref $loops time\n";
+ $sum= 0; $loops=10;
+ for ($i=0 ; $i < $loops ; $i++)
+ {
+ $cost= $function_ref->();
+ $sum+= $cost->{'time'};
+ }
+ print "Average: " . ($sum/$loops) . "\n";
+ print "Shuting down server\n";
+ $dbh->do("shutdown") || die "Got error ..";
+}
+
+##############################################################################
+# Get various simple data from MariaDB
+##############################################################################
+
+sub print_mariadb_version()
+{
+ my ($query, $sth, $row);
+ $query= "select VERSION()";
+ $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n";
+$sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";;
+ $row= $sth->fetchrow_arrayref();
+ print "Server: $row->[0]";
+
+ $query= "show variables like 'VERSION_SOURCE_REVISION'";
+ $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n";
+$sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";;
+ $row= $sth->fetchrow_arrayref();
+ print " Commit: $row->[1]\n";
+}
+
+
+sub get_row_count()
+{
+ $query= "select count(*) from $table";
+ $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n";
+ if (!$sth->execute)
+ {
+ if (!($dbh->errstr =~ /doesn.*exist/))
+ {
+ die "Got error on '$query': " . $dbh->errstr . "\n";
+ }
+ return 0;
+ }
+ $row= $sth->fetchrow_arrayref();
+ return $row->[0];
+}
+
+
+sub get_variable()
+{
+ my ($name)= @_;
+ $query= "select @@" . $name;
+ if (!($sth= $dbh->prepare($query)))
+ {
+ die "Got error on '$query': " . $dbh->errstr . "\n";
+ }
+ $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";;
+ $row= $sth->fetchrow_arrayref();
+ return $row->[0];
+}