diff options
110 files changed, 5180 insertions, 1510 deletions
diff --git a/Docs/optimizer_costs.txt b/Docs/optimizer_costs.txt new file mode 100644 index 00000000000..052c8e3e72a --- /dev/null +++ b/Docs/optimizer_costs.txt @@ -0,0 +1,1160 @@ +This file is intended to explain some of the optimizer cost variables +in MariaDB 10.11. + +Background +========== + +Most timings has come from running: + +./check_costs.pl --rows=1000000 --socket=/tmp/mysql-dbug.sock --comment="--aria-pagecache-buffer-size=10G --innodb-buffer_pool_size=10G --key_buffer-size=1G --max-heap-table-size=10G" + +The MariaDB server is started with the options: +--aria-pagecache-buffer-size=10G --innodb-buffer_pool_size=10G --key_buffer-size=1G --max-heap-table-size=10G" + +- All costs are changed to be milliseconds for engine operations and + other calculations, like the WHERE clause. This is a big change from + before the patch that added this file where the basic cost was a + disk seek and one index read and we assumed they had the same cost. +- I am using Aria as the 'base' cost. This is because it caches all data, + which most other engines also would do. +- MyISAM cannot be used as 'base' as it does not cache row data (which gives + a high overhead when doing row lookups). +- Heap is in memory and a bit too special (no caching). +- InnoDB is a clustered engine where secondary indexes has to use + the clustered index to find a row (not a common case among storage engines). + +The old assumption in the optimzer has 'always' been that +1 cost = 1 seek = 1 index = 1 row lookup = 0.10ms. +However 1 seek != 1 index or row look and this has not been reflected in +most other cost. +This document is the base of changing things so that 1 cost = 1ms. + + +Setup +===== + +All timings are calculated based on result from this computer: +CPU: Intel(R) Xeon(R) W-2295 CPU @ 3.00GHz +Memory: 256G +Disk: Samsum SSD 860 (not really relevant in this case) +Rows in tests: 1M Each test is run 3 times +(one test to cache the data and 2 runs of which we take the average). + +The assumption is that other computers will have somewhat proportional +timings. The timings are done with all data in memory (except MyISAM rows). +This is reflected in the costs for the test by setting +optimizer_disk_read_ratio=0. + +Note that even on a single Linux computer without any notable tasks +the run time vary a bit from run to run (up to 4%), so the numbers in +this document cannot be repeated exactly but should be good enough for +the optimizer. + +Timings for disk accesses on other system can be changed by setting +optimizer_disk_read_cost (usec / 4092 bytes) to match the read speed. + +Default values for check_costs.pl: +optimizer_disk_read_ratio= 0 Everything is cached +SCAN_LOOKUP_COST=1 Cost modifier for scan (for end user) +set @@optimizer_switch='index_condition_pushdown=off'"; + + +ROW_COPY_COST and KEY_COPY_COST +=============================== + +Regarding ROW_COPY_COST: +When calulating cost of fetching a row, we have two alternativ cost +parts (in addition to other costs): +scanning: rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST) +rnd_pos: rows * (ROW_LOOKUP_COST + ROW_COPY_COST) + +In theory we could remove ROW_COPY_COST and just move the cost +to the two other variables. However, in the future there may reason +to be able to modif row_copy_cost per table depending on number and type +of fields (A table of 1000 fields should have a higher row copy cost than +a table with 1 field). Because of this, I prefer to keep ROW_COPY_COST +around for now. + +Regarding KEY_COPY_COST: +When calulating cost of fetching a key we have as part of the cost: +keyread_time: rows * KEY_COPY_COST + ranges * KEY_LOOKUP_COST + + (rows-ranges) * KEY_NEXT_FIND_COST +key_scan_time: rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST) + +We could remove KEY_COPY_COST by adding it to KEY_LOOKUP_COST and +KEY_NEXT_FIND_COST but I prefer to keep it with the same argument as +for ROW_COPY_COST. + +The reation between KEY_COPY_COST / (KEY_NEXT_FIND_COST + KEY_COPY_COST) +is assumed to be 0.1577 (See analyze in the appendix) + +There is a relationship between the above costs in that for a clustered +index the cost is calculated as ha_keyread_time() + ROW_COPY_COST. + + +Preramble +========= + +I tried first to use performance schema to get costs, but I was not +successful as all timings I got for tables showed the total time +executing the statement, not the timing for doing the actual reads. +Also the overhead of performance schema affected the results + +With --performance-schema=on + +MariaDB [test]> select sum(1) from seq_1_to_100000000; ++-----------+ +| sum(1) | ++-----------+ +| 100000000 | ++-----------+ +1 row in set (4.950 sec) + +Performance schema overhead: 30.1% + +With: +UPDATE performance_schema.setup_consumers SET ENABLED = 'YES'; +UPDATE performance_schema.setup_instruments SET ENABLED = 'YES', TIMED = 'YES'; + +Flush with: +CALL sys.ps_truncate_all_tables(FALSE); + +Performance schema overhead now: 32.9% + +Timings from: +select * from events_statements_current where thread_id=80; + +MariaDB [test]> select 885402302809000-884884140290000; ++---------------------------------+ +| 885402302809000-884884140290000 | ++---------------------------------+ +| 518162519000 | ++---------------------------------+ +-> Need to divide by 1000000000000.0 to get seconds + +As seen above, the above gives the total statement time not the time +spent to access the tables. + +In the end, I dediced to use analyze to find out the cost of the table +actions: + +For example: Finding out table scan timing (and thus costs): + +analyze format=json select sum(1) from seq_1_to_100000000; +r_table_time_ms": 1189.239022 + + +Calculating 'optimizer_where_cost' +================================== + +To make the WHERE cost reasonble (not too low) we are assuming there is +2 simple conditions in the default 'WHERE clause' + +MariaDB [test]> select benchmark(100000000,l_commitDate >= '2000-01-01' and l_tax >= 0.0) from test.check_costs limit 1; ++--------------------------------------------------------------------+ +| benchmark(100000000,l_commitDate >= '2000-01-01' and l_tax >= 0.0) | ++--------------------------------------------------------------------+ +| 0 | ++--------------------------------------------------------------------+ +1 row in set (3.198 sec) + +Time of where in seconds: 3.198 / 100000000 (100,000,000) + +Verification: + +select sum(1) from seq_1_to_100000000 where seq>=0.0 and seq>=-1.0; ++-----------+ +| sum(1) | ++-----------+ +| 100000000 | ++-----------+ +1 row in set (8.564 sec) + +MariaDB [test]> select sum(1) from seq_1_to_100000000; ++-----------+ +| sum(1) | ++-----------+ +| 100000000 | ++-----------+ +1 row in set (5.162 sec) + +Time of where= (8.564-5.162)/100000000 = 3.402/100000000 (100,000,000) +(Result good enough, as sligthly different computations) + +check_costs.pl comes provides the numbers when using heap tables and 1M rows: + +simple where: 118.689 ms +complex where: 138.474 ms +no where: 83.699 ms + +Which gives for simple where: +(118.689-83.699)/1000 = 0.034990000000000007 ms +Which is in the same ballpark. + +We use the result from the select benchmark run as this has least overhead +and is easiest to repeat and verify in a test. +Which gives: +optimizer_where_cost= 0.032 ms / WHERE. + + +HEAP TABLE SCAN & ROW_COPY_COST +=============================== + +We start with heap as all rows are in memory and we don't have to take +disk reads into account. + +select sum(l_partkey) from test.check_costs +table_scan ms: 10.02078736 +rows: 1000000 + +Cost should be 10.02078736 (scan cost) + 32 (where cost) + +cost= scan_time() * optimizer_cache_cost * SCAN_LOOKUP_COST + + TABLE_SCAN_SETUP_COST + + records * (ROW_COPY_COST + ROW_LOOKUP_COST + WHERE_COMPARE_COST); + +=> +We are ignoring TABLE_SCAN_SETUP (which is just to prefer index lookup on small +tables). +We can also ignore records * WHERE_COMPARE_COST as we don't have that +in the above calcuated 'ms'. +row_costs= (ROW_COPY_COST + ROW_LOOKUP_COST) + +cost= scan_time() * 1 * 1 + + 1000000.0 * (row_costs) +=> +cost= time_per_row*1000000 + row_costs * 1000000; +=> +time_per_row+row_cost= cost/1000000 + +Let's assume that for heap, finding the next row is 80 % of the time and +copying the row (a memcmp) to upper level is then 20 %. +(This is not really important, we could put everthing in heap_scan_time, +but it's good to have split the data as it gives us more options to +experiment later). + +row_lookup_cost= 10.02078736/1000000*0.8 = 8.0166298880000005e-06 +row_copy_cost= 10.02078736/1000000*0.2 = 2.0041574720000001e-06 + +Conclusion: +heap_scan_time= 8.0166e-06 +row_copy_cost= 2.0042e-06 + +Heap doesn't support key only read, so key_copy_cost is not relevant for it. + + +HEAP INDEX SCAN +=============== + +select count(*) from test.check_costs_heap force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0 +index_scan time: 79.7286117 ms + +Index scan on heap tables can only happen with binary trees. +l_supp_key is using a binary tree. + +cost= (ranges + rows + 1) * BTREE_KEY_NEXT_FIND_COST + rows * row_copy_cost= +(for large number of rows): +rows * (BTREE_KEY_NEXT_FIND_COST + row_copy_cost) + +BTREE_KEY_NEXT_FIND_COST= cost/rows - row_copy_cost = +79.7286117/1000000- 2.334e-06= 0.0000773946117 + + +HEAP EQ_REF +=========== + +select straight_join count(*) from seq_1_to_1000000,test.check_costs_heap where seq=l_linenumber +eq_ref_index_join time: 175.874165 of which 12.57 is from seq_1_to_1000000 + +Note: This is 34% of the cost of an Aria table with index lookup and + 20% of an Aria table with full key+row lookup. + +cost= rows * (key_lookup_cost + row_copy_cost) +key_lookup_cost= cost/rows - key_copy_cost = +(175.874165-12.57)/1000000 - 2.334e-06 = 0.00016097016500000002 + + +HEAP EQ_REF on binary tree index +================================ + +select straight_join count(*) from seq_1_to_1000000,test.check_costs_heap where seq=l_extra and l_partkey >= 0 +eq_ref_join time: 241.350539 ms of which 12.57 is from seq_1_to_1000000 + +rows * (tree_find_cost() + row_copy_cost) = + +tree_find_cost()= cost/rows - row_copy_cost = + +(241.350539-12.57)/1000000 - 2.334e-06= 0.000226446539 + +tree_find_cost() is defined as key_compare_cost * log2(table_rows) +-> +key_compare_cost= 0.000226446539/log2(1000000) = 0.000011361200108882259; + + +SEQUENCE SCAN +============= + +analyze format=json select sum(seq+1) from seq_1_to_1000000; +r_table_time_ms: 12.47830611 + +Note that for sequence index and table scan is the same thing. +We need to have a row_copy/key_copy cost as this is used when doing +an key lookup for sequence. Setting these to 50% of the full cost +should be sufficent for now. + +Calculation sequence_scan_cost: + +When ignoring reading from this, the cost of table scan is: +rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST) + +The cost of key scan is: +ranges * KEY_LOOKUP_COST + (rows - ranges) * KEY_NEXT_FIND_COST + +rows * KEY_COPY_COST; + +As there is no search after first key for sequence, we can set +KEY_LOOKUP_COST = KEY_NEXT_FIND_COST. + +This gives us: + +r_table_time_ms = (ROW_NEXT_FIND_COST + ROW_COPY_COST) = + (KEY_NEXT_FIND_COST + KEY_COPY_COST) * 1000000; + +-> +ROW_NEXT_FIND_COST= ROW_COPY_COST = KEY_LOOKUP_COST + KEY_COPY_COST= +12.47830611/1000000/2 = 0.0000062391530550 + + +HEAP KEY LOOKUP +=============== + +We can use this code to find the timings of a index read in a table: + +analyze format=json select straight_join count(*) from seq_1_to_1000000,check_costs where seq=l_orderkey + +"query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": 420.5083447, + "table": { + "table_name": "seq_1_to_1000000", + "access_type": "index", + "possible_keys": ["PRIMARY"], + "key": "PRIMARY", + "key_length": "8", + "used_key_parts": ["seq"], + "r_loops": 1, + "rows": 1000000, + "r_rows": 1000000, + "r_table_time_ms": 12.47830611, + "r_other_time_ms": 44.0671283, + "filtered": 100, + "r_filtered": 100, + "using_index": true + }, + "table": { + "table_name": "check_costs", + "access_type": "eq_ref", + "possible_keys": ["PRIMARY"], + "key": "PRIMARY", + "key_length": "4", + "used_key_parts": ["l_orderkey"], + "ref": ["test.seq_1_to_1000000.seq"], + "r_loops": 1000000, + "rows": 1, + "r_rows": 1, + "r_table_time_ms": 160 + "filtered": 100, + "r_filtered": 100, + "attached_condition": "seq_1_to_1000000.seq = check_costs.l_orderkey" + } + } + +This gives the time for a key lookup on hash key as: +160/10000000 - row_copy_cost = +160/1000000.0 - 2.0042e-06 = 0.00015799580000000002 + + +ARIA TABLE SCAN +=============== +(page format, all rows are cached) + +table_scan ms: 107.315698 + +Cost is calculated as: + +blocks= stats.data_file_length / stats.block_size) = 122888192/4096= 30002 +engine_blocks (8192 is block size in Aria) = 15001 + +cost= blocks * avg_io_cost() * + optimizer_cache_cost * SCAN_LOOKUP_COST + + engine_blocks * INDEX_BLOCK_COPY_COST + + TABLE_SCAN_SETUP_COST + + records * (ROW_NEXT_FIND_COST + ROW_COPY_COST)); + +When all is in memory (optimizer_cache_cost= 0) we get: + +cost= blocks * INDEX_BLOCK_COPY_COST + + TABLE_SCAN_SETUP_COST + + records * (ROW_NEXT_FIND_COST + ROW_COPY_COST)); + +To calculate INDEX_BLOCK_COPY_COST I added a temporary tracker in +ma_pagecache.cc::pagecache_read() and did run the same query. +I got the following data: +{counter = 17755, sum = 1890559} +Which give me the time for copying a block to: +1000.0*1890559/sys_timer_info.cycles.frequency/17755 = 3.558138826971332e-05 ms +And thus INDEX_BLOCK_COPY_COST= 0.035600 + +Replacing known constants (and ignore TABLE_SCAN_SETUP_COST): +cost= 107.315698 = 15001 * 3.56e-5 + 1000000 * aria_row_copy_costs; + +aria_row_copy_costs= (107.315698 - (15001 * 3.56e-5))/1000000 = +0.0001067816624 + +As ROW_COPY_COST/ROW_NEXT_FIND_COST= 0.57 (See appendex) + +ROW_COPY_COST= 0.0001067816624 * 0.57 = 0.000060865547560 +ROW_NEXT_FIND_COST= 0.0001067816624 * 0.43 = 0.000045916114832 + + +Aria, INDEX SCAN +================ + +Finding out cost of reading X keys from an index (no row lookup) in Aria. + +Query: select count(*) from test.check_costs_aria force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0 +Table access time: ms: 98.1427158 + +blocks= index_size/IO_SIZE = +(rows * tot_key_length / INDEX_BLOCK_FILL_FACTOR) / IO_SIZE +-> +1000000 * 19 / 0.75/ 4096 = 6184 +engine_blocks (block_size 8192) = 6184/2 = 3092 +(Range optimzer had calculated 3085) + +keyread_time= blocks * avg_io_cost() * cache + engine_blocks * INDEX_BLOCK_COPY_COST + rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST); += engine_blocks * INDEX_BLOCK_COPY_COST + rows * KEY_NEXT_FIND_COST= + 3092 * 3.56e-05 + 1000000 * (KEY_NEXT_FIND_COST + KEY_COPY_COST) +-> +KEY_NEXT_FIND_COST + KEY_COPY_COST= (98.1427158 - 3092 * 3.56e-05)/1000000 = +0.0000980326406; + +KEY_COPY_COST= 0.0000980326406 * 0.16 = 0.000015685222496 +KEY_NEXT_FIND_COST= 0.0000980326406 * 0.84 = 0.000082347418104 + + +Aria, RANGE SCAN (scan index, fetch a row for each index entry) +=============================================================== + +Query: +select sum(l_orderkey) from test.check_costs_aria force index(l_suppkey) where l_suppkey >= 0 and l_partkey >=0 +range_scan ms: 309.7620909 + +cost= keyread_time + rnd_pos_time. +keyread_time is as above in index scan, but whithout KEY_COPY_COST: +keyread_time= 98.1427158 - KEY_COPY_COST * 1000000= +98.1427158 - 0.000015685222496 * 1000000= 82.457493304000000; +rnd_pos_time= 309.7620909 - 82.457493304000000 = 227.304597596000000 + +rnd_pos_time() = io_cost + engine_mem_cost + + rows * (ROW_LOOKUP_COST + ROW_COPY_COST) = +rows * avg_io_cost() * engine_block_size/IO_SIZE + +rows * INDEX_BLOCK_COPY_COST + +rows * (ROW_COPY_COST + ROW_LOOKUP_COST) += (When rows are in memory) +rows * INDEX_BLOCK_COPY_COST + +rows * (ROW_COPY_COST + ROW_LOOKUP_COST) + +This gives us: +227.304597596000000 = 1000000 * 3.56e-05 + 1000000*(0.000060865547560 + ROW_LOOKUP_COST) +-> +ROW_LOOKUP_COST= (227.304597596000000 - 1000000 * 3.56e-05 - 1000000*0.000060865547560) / 1000000 = 0.0001308390500 + + +Aria, EQ_REF with index_read +============================ + +select straight_join count(*) from seq_1_to_1000000,test.check_costs_aria where seq=l_linenumber +eq_ref_index_join 499.631749 ms + +According to analyze statement: + +- Cost for SELECT * from seq_1_to_1000000: 12.57 + (From Last_query_cost after the above costs has been applied) +- Time from check_costs: eq_ref's: 499.631749- 12.57s = 487.061749 + +cost= rows * (keyread_time(1,1) + KEY_COPY_COST) + +keyread_time(1,1)= INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST; + +cost= rows * (KEY_COPY_COST + INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST) +-> +KEY_LOOKUP_COST= cost/rows - 0.000015685222496 - 0.000035600 +KEY_LOOKUP_COST= 487.061749 / 1000000 - 0.000035600 - 0.000015685222496 +KEY_LOOKUP_COST= 0.000435776526504 + + +MyISAM, TABLE SCAN +================== + +select sum(l_partkey) from test.check_costs_myisam +table_scan ms: 126.353364 + +check_costs.MYD: 109199788 = 26660 IO_SIZE blocks +The row format for MyISAM is similar to Aria, so we use the same +ROW_COPY_COST for Aria. + +cost= blocks * avg_io_cost() * + optimizer_cache_cost * SCAN_LOOKUP_COST + + engine_blocks * INDEX_BLOCK_COPY_COST + + TABLE_SCAN_SETUP_COST + + rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST)); + +MyISAM is using the file system as a row cache. +Let's put the cost of accessing the row in ROW_NEXT_FIND_COST. +Everything is cached (by the file system) and optimizer_cache_cost= 0; + +cost= engine_blocks * INDEX_BLOCK_COPY_COST + + TABLE_SCAN_SETUP_COST + + rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST)) + +ROW_NEXT_FIND_COST= +(costs - engine_blocks * ROW_BLOCK_COPY_COST - TABLE_SCAN_SETUP_COST)/rows - +ROW_COPY_COST += +(126.353364 - 26660 * 3.56e-05 - 1)/1000000 - 0.000060865547560 +ROW_NEXT_FIND_COST= 0.00006353872044 + + +MyISAM INDEX SCAN +================= + +select count(*) from test.check_costs_myisam force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0; +index_scan ms: 106.490584 + +blocks= index_size/IO_SIZE = +(rows * tot_key_length / INDEX_BLOCK_FILL_FACTOR) / IO_SIZE +-> +1000000 * 19 / 0.75/ 4096 = 6184 +As MyISAM has a block size of 4096 for this table, engine_blocks= 6184 + +cost= keyread_time= blocks * avg_io_cost() * cache + engine_blocks * INDEX_BLOCK_COPY_COST + rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST); +-> +cost= engine_blocks * INDEX_BLOCK_COPY_COST + rows * KEY_NEXT_FIND_COST + +Assuming INDEX_BLOCK_COPY_COST is same as in Aria and the code for +key_copy is identical to Aria: +cost= 6184 * 3.56e-05 + 1000000 * (KEY_NEXT_FIND_COST + KEY_COPY_COST) +-> +KEY_NEXT_FIND_COST= (106.490584 - 6184 * 3.56e-05)/1000000 - 0.000015685222496= +0.000090585211104 + + +MyISAM, RANGE SCAN (scan index, fetch a row for each index entry) +================================================================= + +select sum(l_orderkey) from test.check_costs_myisam force index(l_suppkey) where l_suppkey >= 0 and l_partkey >=0 and l_discount>=0.0 +time: 1202.0894 ms + +cost= keyread_time + rnd_pos_time. +keyread_time is as above in MyISAM INDEX SCAN, but without KEY_COPY_COST: +keyread_time= 106.490584 - KEY_COPY_COST * 1000000= +106.490584 - 0.000015685222496 * 1000000= 90.805361504000000; +rnd_pos_time= 1202.0894 - 90.805361504000000 = 1111.284038496000000 + +rnd_pos_time() = io_cost + engine_mem_cost + + rows * (ROW_LOOKUP_COST + ROW_COPY_COST) = +rows * avg_io_cost() * engine_block_size/IO_SIZE + +rows * INDEX_BLOCK_COPY_COST + +rows * (ROW_COPY_COST + ROW_LOOKUP_COST) += (When rows are in memory) +rows * INDEX_BLOCK_COPY_COST + +rows * (ROW_COPY_COST + ROW_LOOKUP_COST) + +This gives us: + 1111.284038496000000 = 1000000 * 3.56e-05 + 1000000*(0.000060865547560 + ROW_LOOKUP_COST) +-> +ROW_LOOKUP_COST= ( 1111.284038496000000 - 1000000 * (3.56e-05 + 0.000060865547560)) / 1000000s +-> +ROW_LOOKUP_COST= 0.001014818490936 + +As the row is never cached, we have to ensure that rnd_pos_time() +doesn't include an io cost (which would be affected by +optimizer_cache_hit_ratio). This is done by having a special +ha_myisam::rnd_pos_time() that doesn't include io cost but instead an +extra cpu cost. + + +MyISAM, EQ_REF with index_read +============================== + +select straight_join count(*) from seq_1_to_1000000,test.check_costs_myisam where seq=l_linenumber; +eq_ref_join ms: 613.906777 of which 12.48 ms is for seq_1_to_1000000; + +According to analyze statement: + +- Cost for SELECT * from seq_1_to_1000000: 12.48 (See sequence_scan_cost) +- Time from check_costs: eq_ref's: 613.906777- 12.48 = 601.426777; + +cost= rows * (keyread_time(1) + KEY_COPY_COST) + +keyread_time(1)= INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST; + +cost= rows * (KEY_COPY_COST + INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST) +-> +KEY_LOOKUP_COST= cost/rows - INDEX_BLOCK_COPY_COST - KEY_COPY_COST; +601.426777 / 1000000 - 3.56e-05 - 0.000015685222496 = 0.00055014155451 +KEY_LOOKUP_COST= 0.00055014155451 + + + +InnoDB, TABLE SCAN +================== + +select sum(l_quantity) from check_costs_innodb; +table_scan 131.302492 +Note that InnoDB reported only 956356 rows instead of 100000 in stats.records +This will will cause the optimizer to calculate the costs based on wrong +assumptions. + +As InnoDB have a clustered index (which cost is a combination of +KEY_LOOKUP_COST + ROW_COPY_COST), we have to ensure that the +relationship between KEY_COPY_COST and ROW_COPY_COST is close to the +real time of copying a key and a row. + +I assume, for now, that the row format for InnoDB is not that +different than for Aria (in other words, computation to unpack is +about the same), so lets use the same ROW_COPY_COST (0.000060865547560) + +I am ignoring the fact that InnoDB can optimize row copying by only +copying the used fields as the optimizer currently have to take that +into account. (This would require a way to update ROW_COPY_COST / +table instance in the query). + +For now, lets also use the same value as Aria for +INDEX_BLOCK_COPY_COST (3.56e-05). + +The number of IO_SIZE blocks in the InnoDB data file is 34728 (from gdb)) +(For reference, MyISAM was using 26660 and Aria 30002 blocks) +As InnoDB is using 16K blocks, the number of engine blocks= 34728/4= 8682 + +cost= blocks * avg_io_cost() * + optimizer_cache_cost * SCAN_LOOKUP_COST + + engine_blocks * INDEX_BLOCK_COPY_COST + + TABLE_SCAN_SETUP_COST + + rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST)); + +as optimizer_cache_cost = 0 + +cost= engine_blocks * INDEX_BLOCK_COPY_COST + + TABLE_SCAN_SETUP_COST + + rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST)) + +ROW_NEXT_FIND_COST= +(costs - engine_blocks * ROW_BLOCK_COPY_COST - TABLE_SCAN_SETUP_COST)/rows - +ROW_COPY_COST += (Ignoring TABLE_SCAN_SETUP_COST, which is just 10 usec) +(131.302492 - 8682 * 3.56e-05)/1000000 - 0.000060865547560 = +0.00007012786523999997 + + +InnoDB INDEX SCAN +================= + +select count(*) from check_costs_innodb force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0; +index_scan 114.733037 ms +Note that InnoDB is reporting 988768 rows instead of 1000000 +(The number varies a bit between runs. At another run I got 956356 rows) +With default costs (as of above), we get a query cost of 112.142. This can +still be improved a bit... + +blocks= index_size/IO_SIZE = +(rows * tot_key_length / INDEX_BLOCK_FILL_FACTOR) / IO_SIZE +-> (total_key_length is 17 in InnoDB, 19 in Aria) +1000000 * 17 / 0.75/ 4096 = 5533 +engine_blocks= 5533/4 = 1383 + +(In reality we get 5293 blocks and 1323 engine blocks, because of the +difference in InnoDB row count) + +cost= keyread_time= blocks * avg_io_cost() * cache + engine_blocks * INDEX_BLOCK_COPY_COST + rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST); +-> +cost= engine_blocks * INDEX_BLOCK_COPY_COST + rows * KEY_NEXT_FIND_COST + +Assuming INDEX_BLOCK_COPY_COST is same as in Aria: +(Should probably be a bit higher as block_size in InnoDB is 16384 +compared to 8192 in Aria) + +cost= 1383 * 3.56e-05 + 1000000 * (KEY_NEXT_FIND_COST + KEY_COPY_COST) += +KEY_NEXT_FIND_COST + KEY_COPY_COST= (114.733037 - 1383 * 3.56e-05)/1000000 += +KEY_NEXT_FIND_COST= (114.733037 - 1383 * 3.56e-05)/1000000 - 0.000015685222496 +-> +KEY_NEXT_FIND_COST=0.000098998579704; + +Setting this makes InnoDB calculate the cost to 113.077711 (With estimate of +988768 rows) +If we would have the right number of rows in ha_key_scan_time, we would +have got a cost of: + +Last_query_cost: 145.077711 (Including WHERE cost for 988768 row) +(145.077711)/988768*1000000.0-32 = 114.72573444933 + + +InnoDB RANGE SCAN +================= + +select sum(l_orderkey) from check_costs_innodb force index(l_suppkey) where l_suppkey >= 0 and l_partkey >=0 and l_discount>=0.0 +range_scan 961.4857045 ms +Note that InnoDB was reporting 495340 rows instead of 1000000 ! +I added a patch to fix this and now InnoDB reports 990144 rows + +cost= keyread_time + rnd_pos_time. +keyread_time is as above in index scan, but we want it without KEY_COPY_COST: +keyread_time= cost - KEY_COPY_COST * 1000000= +114.733037 - 0.000015685222496 * 1000000= 99.047814504000000 +rnd_pos_time= 961.4857045 - 99.047814504000000 = 862.437889996000000 + +rnd_pos_time() = io_cost + engine_mem_cost + + rows * (ROW_LOOKUP_COST + ROW_COPY_COST) = +rows * avg_io_cost() * engine_block_size/IO_SIZE + +rows * INDEX_BLOCK_COPY_COST + +rows * (ROW_COPY_COST + ROW_LOOKUP_COST) += (When rows are in memory) + +rows * (INDEX_BLOCK_COPY_COST + ROW_COPY_COST + ROW_LOOKUP_COST) + +This gives us: +862.437889996000000 = 1000000 * 3.56e-05 + 1000000*(0.000060865547560 + ROW_LOOKUP_COST) +-> +ROW_LOOKUP_COST= (862.437889996000000 - 1000000*(3.56e-05+0.000060865547560)) / 1000000 +-> +ROW_LOOKUP_COST= 0.000765972342436 + +Setting this makes InnoDB calculate the cost to 961.081050 (good enough) + + +InnodDB EQ_REF with index_read +============================== + +select straight_join count(*) from seq_1_to_1000000,test.check_costs_innodb where seq=l_linenumber +time: 854.980610 ms + +Here the engine first has to do a key lookup and copy the key to the upper +level (Index only read). + +According to analyze statement: + +- Cost for SELECT * from seq_1_to_1000000: 12.57 (See sequence_scan_cost) +- Time from check_costs: eq_ref_join: 854.980610 + This is time for accessing both seq_1_to_1000000 and check_costs + time for check_cost_innodb: 854.980610-12.57 = 842.410610 ms + +cost= rows * (keyread_time(1,1) + KEY_COPY_COST) + +keyread_time(1,1)= INDEX_BLOCK_COPY_COST + ranges * KEY_LOOKUP_COST + + (rows-ranges) * KEY_NEXT_FIND_COST + +As rows=1 and ranges=1: + +keyread_time(1,1)= INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST + +cost= rows * (KEY_COPY_COST + INDEX_BLOCK_COPY_COST + KEY_LOOKUP_COST) +-> +KEY_LOOKUP_COST= cost/rows - INDEX_BLOCK_COPY_COST - KEY_COPY_COST; +842.410610 / 1000000 - 3.56e-05 - 0.000015685222496 +-> +KEY_LOOKUP_COST= 0.000791125387504; + +After the above we have +last_query_cost=918.986438; + +The cost for check_costs_innodb = +last_query_cost - sequence_scan_cost - where_cost*2 = +918.986438 - 12.57 - 32*2 = 842.416438 (ok) + + +InnodDB EQ_REF with clustered index read +======================================== + +select straight_join count(*) from seq_1_to_1000000,check_costs_innodb where seq=l_orderkey +eq_ref_cluster_join time: 972.290773 ms + +According to analyze statement: +- Cost for SELECT * from seq_1_to_1000000: 12.57 (See sequence_scan_cost) +- Time from check_costs: eq_ref_cluster_join: 972.290773 ms + This is time for accessing both seq_1_to_1000000 and check_costs_innodb. + Time for check_cost_innodb: 972.290773 - 12.57 = 959.790773 + +The estimated cost is 875.0160 + +cost= rows * (keyread_time(1,1) + + ranges * ROW_LOOKUP_COST + + (rows - ranges) * ROW_NEXT_FIND_COST + + rows * ROW_COPY_COST) + +As rows=1 and ranges=1: + +cost= rows * (INDEX_BLOCK_COPY_COST + ROW_LOOKUP_COST + ROW_COPY_COST); +-> +ROW_LOOKUP_COST= cost/rows - INDEX_BLOCK_COPY_COST - ROW_COPY_COST; +959.790773 / 1000000 - 3.56e-05 - 0.000060865547560 +-> +ROW_LOOKUP_COST= 0.0008633252254400001 + +From InnoDB RANGE SCAN we have ROW_LOOKUP_COST=0.000765972342436 +From EQ_REF with index read we have KEY_LOOKUP_COST= 0.000791125387504, +which should in theory be identical to ROW_LOOKUP_COST, + +For now we have to live with the difference (as I want to have the project done +for the next release). + +The difference could be come from the following things: + +- InnoDB estimation of rows in the range scan test is a bit off. +- Maybe the work to find a row from an internal key entry compared to + a external key is a bit difference (less checking/conversions) +- There is different keys used for range scan and this test that could have + different costs +- Maybe we should increase ROW_COPY_COST or ROW_LOOKUP_COST for InnoDB + and adjust other costs. + + +Some background. In range scan, the cost is: +- Scanning over all keys + - For each key, fetch row using rowid + +For the EQ_REF cache +- Scan seq_1_to_1000000 + for each value in seq + do a index_read() call + + +Archive scan cost +================= + +table_scan time: 757.390280 ms +rows: 1000000 +file size: 32260650 = 7878 IO_SIZE blocks + +cost= scan_time() + TABLE_SCAN_SETUP_COST + + records * (ROW_COPY_COST + ROW_LOOKUP_COST + WHERE_COMPARE_COST); + +757.390280 = scan_time() + 10 + 1000000 * (0.060866+0.032000) +-> +scan_time()= 757.390280 - (10 + 1000000 * (0.060866+0.032000)/1000) = 654.52428 + +scan_time() is defined as: + +cost.cpu= (blocks * DISK_READ_COST * DISK_READ_RATIO + + blocks * ARCHIVE_DECOMPRESS_TIME); + +Default values for above: +blocks= 7878 +DISK_READ_COST: 10.240000 usec +DIUSK_READ_RATIO= 0.20 +-> +ARCHIVE_COMPRESS_TIME= (654.52428 - (7878 * 10.240000/1000*0.2)) / 7878 = +0.081034543792841 + + +Future improvements +=================== + +The current costs are quite good for tables of 1M rows (usually about +10% from the true cost for the test table). + +For smaller tables the costs will be a bit on the high side and for +bigger tables a bit on the low size for eq_ref joins (both with index +and with row lookup). + +The only engine that takes into account the number of rows for key lookups +is heap with binary-tree indexes. + +Ideas of how to fix this: + +- Change KEY_LOOKUP_COST, INDEX_BLOCK_COPY_COST and ROW_LOOKUP_COST + (for clustered index) to take into account the hight of the B tree. + + +Appendix +======== + +Observations +============ + +Ratio between table scan and range scan + +Quereyies used: +select sum(l_quantity) from check_costs_aria; +select sum(l_orderkey) from test.check_costs_aria force index(l_suppkey) where l_suppkey >= 0 and l_partkey >=0 and l_discount>=0.0; + +The test for Aria shows that cost ratio of range_scan/table_scan are: +disk_read_ratio=0 341.745207/139.348286= 2.4524536097 +disk_read_ratio=0.02 752.408528/145.748695= 5.1623688843 +disk_read_ratio=0.20 4448.378423/203.352382= 21.8752216190 + +As we are using disk_read_ratio=0.02 by default, this means that in +mtr to not use table scan instead of range, we have to ensure that the +range does not cover more than 1/5 of the total rows. + + +Trying to understand KEY_COPY_COST +================================== + +An index scan with 2 and 4 key parts on an Aria table. +The index has null key parts, so packed keys are used. + +Query1 "index_scan" (2 integer key parts, both key parts may have NULLS): +select count(*) from $table force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0"); + +- Optimized build: Average 164 ms/query +- gprof build: Average 465 ms/query + +[16] 51.2 0.00 0.21 3999987 handler::ha_index_next() +[15] 51.2 0.01 0.20 3999993 maria_rnext [15] +[22] 19.5 0.08 0.00 9658527 _ma_get_pack_key [22] + +This means that for 3999987 read next calls, the time of _ma_get_pack_key +to retrieve the returned key is: +0.08 * (3999987/9658527) + +The relation of KEY_COPY_COST to KEY_NEXT_FIND_COST is thus for Aria: + +0.08 * (3999987/9658527)/0.21 = 0.15777 parts of KEY_NEXT_FIND_COST + +------ + +Query 2 "index_scan_4_parts" (4 integer key parts, 2 parts may have NULL's): +select count(*) from $table force index (long_suppkey) where l_linenumber >= 0 and l_extra >0"); + +- Optimized build: 218 ms +- gprof build: Average 497 ms/query + +Most costly functions + % cumulative self self total + time seconds seconds calls ms/call ms/call name + 13.44 0.61 0.61 48292742 0.00 0.00 _ma_get_pack_key + 8.59 1.00 0.39 28298101 0.00 0.00 ha_key_cmp + 7.27 1.33 0.33 19999951 0.00 0.00 _ma_put_key_in_record + 4.41 1.96 0.20 19999952 0.00 0.00 handler::ha_index_next(unsigned char*) + +Call graph +[13] 9.0 0.20 0.21 19999952 handler::ha_index_next(unsigned char*) [13] + +[3] 21.6 0.16 0.82 19999960 _ma_search_next [3] +[18] 7.7 0.02 0.33 19999951 _ma_read_key_record [18] + 0.00 0.00 19887291/19999952 _ma_get_static_key [6565][19] + 18.4 0.10 0.64 19999936 Item_cond_and::val_int() [19] + +-> KEY_COPY_COST = 1.33/1.96 = 0.6785 parts of the index_read_next + +Total cost increase from 2 -> 4 key parts = 1.96 / 1.40 = 40% +This includes the additional work in having more key pages, more work in +finding next key (if key parts are packed or possible null) ,and copying +the key parts to the record + +I also did a quick analyze between using NOT NULL keys, in which case +Aria can use fixed key lengths. This gives a 39.4% speed up on index +scan, a small speedup to table scan (as 2 fields are cannot have null) +but not a notable speed up for anything else. + + +Trying to understand ROW_COPY_COST +================================== + +An simple table scan on an Aria table + +query: select sum(l_quantity) from check_costs_aria + +From gprof running the above query 10 times with 1M rows in the table: + +[14] 83.7 0.03 0.76 9999989 handler::ha_rnd_next() +[17] 51.6 0.49 0.00 10000010 _ma_read_block_record2 [17] +[18] 21.1 0.01 0.19 156359 pagecache_read [18] + +The function that unpacks the row is _ma_read_block_record2() + +Taking into account that all pages are cached: +(Note that the main cost in pagecache_read in this test is calculating the page +checksum) + +ROW_COPY_COST/ROW_NEXT_FIND_COST= 0.49/(0.76+0.3-0.20) = 0.56977 = 0.57 + + +Reason for SCAN_SETUP_COSTS +=========================== + +One problem with the new more exact cost model is that the optimizer +starts to use table scans much more for small tables (which is correct when +one looks at cost). However, small tables are usually cached fully so +it is still better to use index scan in many cases. + +This problem is especially notable in mtr where most test cases uses +tables with very few rows. + +TABLE_SCAN_SETUP_COST is used to add a constant startup cost for +table and index scans. It is by default set to 10 usec, about 10 MyISAM +row reads. + +The following cost calculation shows why this is needed: + +explain select count(*) from t1, t2 where t1.p = t2.i ++------+-------------+-------+-------+---------------+---------+---------+-----------+------+-------------+ +| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | ++------+-------------+-------+-------+---------------+---------+---------+-----------+------+-------------+ +| 1 | SIMPLE | t1 | index | PRIMARY | PRIMARY | 4 | NULL | 2 | Using index | +| 1 | SIMPLE | t2 | ref | k1 | k1 | 5 | test.t1.p | 2 | Using index | ++------+-------------+-------+-------+---------------+---------+---------+-----------+------+-------------+ + +t1 has 2 rows +t2 has 4 rows + +Optimizer trace shows when using TABLE_SCAN_SETUP_COST=0: + +index scan costs +"read_cost": 0.00308962, +read_and_compare_cost": 0.00321762 + +key read costs: +"rows": 2, +"cost": 0.00567934 + +CHOSEN: +Scan with join cache: cost": 0.0038774 +rows_after_scan": 2 + +Note that in the following, we are using cost in microseconds while +the above costs are in milliseconds. + +select * from information_schema.optimizer_costs where engine="myisam"\G + ENGINE: MyISAM + OPTIMIZER_DISK_READ_COST: 10.240000 + OPTIMIZER_INDEX_BLOCK_COPY_COST: 0.035600 + OPTIMIZER_KEY_COMPARE_COST: 0.008000 + OPTIMIZER_KEY_COPY_COST: 0.066660 + OPTIMIZER_KEY_LOOKUP_COST: 0.498540 + OPTIMIZER_KEY_NEXT_FIND_COST: 0.060210 + OPTIMIZER_DISK_READ_RATIO: 0.200000 +OPTIMIZER_RND_POS_INTERFACE_COST: 0.000000 + OPTIMIZER_ROW_COPY_COST: 0.088630 + OPTIMIZER_ROW_LOOKUP_COST: 0.641150 + OPTIMIZER_ROW_NEXT_FIND_COST: 0.049510 + OPTIMIZER_ROWID_COMPARE_COST: 0.004000 +@@OPTIMIZER_SCAN_SETUP_COST 10.000000 +@@OPTIMIZER_WHERE_COST 0.032000 + +Checking the calculated costs: + +index_scan_cost= 10.240000 * 0.2 + 0.035600 + 0.498540 + 4 * (0.060210+0.066660) = 3.08962 +where_cost 0.032000*4= 0.128000 +total: 3.21762 + +key_read_cost= 10.240000 * 0.2 + 0.035600 + 0.498540 + 0.060210 = 2.64235 +key_copy_cost= 0.066660 * 2 = 0.13332 +where_cost 0.032000*2= 0.06400 +total: 2.64235 + 0.13332 + 0.06400 = 2.8396699999999999 +Needs to be done 2 times (2 rows in t1): 5.67934 + +Join cache only needs 1 refill. The calculation is done in +sql_select.cc:best_access_path() + +scan_with_join_cache= +scan_time + cached_combinations * ROW_COPY_COST * JOIN_CACHE_COST + +row_combinations * (ROW_COPY_COST * JOIN_CACHE_COST + WHERE_COST) = +3.2176 + 2 * 0.088630 + 2*2 * (0.088630 * 1 + 0.032000) = +3.87738 + +Other observations: +OPTIMIZER_KEY_NEXT_FIND_COST + OPTIMIZER_KEY_COPY_COST + OPTIMIZER_WHERE_COST= +0.060210 + 0.066660 + 0.032000 = 0.158870 +OPTIMIZER_KEY_LOOKUP_COST / 0.158870 = 3.138 + +This means that when using index only reads (and DISK_READ_RATIO=0) +the optimizer will prefer to use 3 times more keys in range or ref +than doing a key lookups! +If DISK_READ_RATIO is higher, the above ratio increases. This is one of +the reasons why we set the default value for DISK_READ_RATIO quite low +(0.02 now) + +(OPTIMIZER_ROW_COPY_COST + OPTIMIZER_ROW_NEXT_FIND_COST) / +(OPTIMIZER_KEY_COPY_COST + OPTIMIZER_KEY_NEXT_FIND_COST) = +(0.088630 + 0.049510) / (0.066660 + 0.060210) = 1.08831 +Which means that table scans and index scans have almost the same cost. +select 0.066660 + + +HEAP_TEMPTABLE_CREATE_COST +========================== + +I added trackers in create_tmp_table() and open_tmp_table() and run a +simple query that create two materialized temporary table with an unique +index 31 times. I got the following tracking information: + +(gdb) p open_tracker +$1 = {counter = 31, cycles = 302422} +(gdb) p create_tracker +$2 = {counter = 31, cycles = 1479836} + +Cycles per create = (302422 + 1479836)/31= 57492 + +1000.0*57492/sys_timer_info.cycles.frequency = 0.0249 ms +HEAP_TMPTABLE_CREATE_COST= 0.025 ms + + +MySQL cost structures +===================== + +MySQL 8.0 server cost are stored in the class Server_cost_constants defined +int opt_costconstants.h + +It containts the following slots and has the following default values: + +m_row_evaluate_cost 0.1 Cost for evaluating the query condition on + a row +m_key_compare_cost 0.05 Cost for comparing two keys +m_memory_temptable_create_cost 1.0 Cost for creating an internal temporary + table in memory +m_memory_temptable_row_cost 0.1 Cost for retrieving or storing a row in an + internal temporary table stored in memory. +m_disk_temptable_create_cost 20.0 Cost for creating an internal temporary + table in a disk resident storage engine. +m_disk_temptable_row_cost 0.5 Cost for retrieving or storing a row in an + internal disk resident temporary table. + +Engine cost variables: +m_memory_block_read_cost 0.25 The cost of reading a block from a main + memory buffer pool +m_io_block_read_cost 1.0 The cost of reading a block from an + IO device (disk) + +------- + +Some cost functions: + +scan_time() = data_file_length / IO_SIZE + 2; +read_time(index, ranges, rows)= rows2double(ranges + rows); +index_only_read_time()= records / keys_per_block + +table_scan_cost()= scan_time() * page_read_cost(1.0); + +index_scan_cost()= index_only_read_time(index, rows) * + page_read_cost_index(index, 1.0); +read_cost()= read_time() * page_read_cost(1.0); + + +page_read_cost()= buffer_block_read_cost(pages_in_mem) + + io_block_read_cost(pages_on_disk); + +io_block_read_cost()= blocks * m_io_block_read_cost +buffer_block_read_cost()= blocks * m_memory_block_read_cost; + + +There are also: +table_in_memory_estimate() +index_in_memory_estimate() + +If the storage engine is not providing estimates for the above, then +the estimates are done based on table size (not depending on how many +rows are going to be accessed in the table). diff --git a/client/mysql.cc b/client/mysql.cc index 0e704f70ecb..015b977e1c3 100644 --- a/client/mysql.cc +++ b/client/mysql.cc @@ -46,7 +46,7 @@ #include <locale.h> #endif -const char *VER= "15.1"; +const char *VER= "15.2"; /* Don't try to make a nice table if the data is too big */ #define MAX_COLUMN_LENGTH 1024 @@ -246,7 +246,7 @@ static my_bool ignore_errors=0,wait_flag=0,quick=0, tty_password= 0, opt_nobeep=0, opt_reconnect=1, opt_secure_auth= 0, default_pager_set= 0, opt_sigint_ignore= 0, - auto_vertical_output= 0, + auto_vertical_output= 0, show_query_cost= 0, show_warnings= 0, executing_query= 0, ignore_spaces= 0, opt_binhex= 0, opt_progress_reports; static my_bool debug_info_flag, debug_check_flag, batch_abort_on_error; @@ -324,6 +324,7 @@ static int com_quit(String *str,char*), com_notee(String *str, char*), com_charset(String *str,char*), com_prompt(String *str, char*), com_delimiter(String *str, char*), com_warnings(String *str, char*), com_nowarnings(String *str, char*); +static int com_query_cost(String *str, char*); #ifdef USE_POPEN static int com_nopager(String *str, char*), com_pager(String *str, char*), @@ -395,6 +396,8 @@ static COMMANDS commands[] = { { "print", 'p', com_print, 0, "Print current command." }, { "prompt", 'R', com_prompt, 1, "Change your mysql prompt."}, { "quit", 'q', com_quit, 0, "Quit mysql." }, + { "costs", 'Q', com_query_cost, 0, + "Toggle showing query costs after each query" }, { "rehash", '#', com_rehash, 0, "Rebuild completion hash." }, { "source", '.', com_source, 1, "Execute an SQL script file. Takes a file name as an argument."}, @@ -1156,6 +1159,7 @@ static void print_table_data_xml(MYSQL_RES *result); static void print_tab_data(MYSQL_RES *result); static void print_table_data_vertically(MYSQL_RES *result); static void print_warnings(void); +static void print_last_query_cost(void); static void end_timer(ulonglong start_time, char *buff); static void nice_time(double sec,char *buff,bool part_second); extern "C" sig_handler mysql_end(int sig) __attribute__ ((noreturn)); @@ -1816,6 +1820,10 @@ static struct my_option my_long_options[] = {"show-warnings", OPT_SHOW_WARNINGS, "Show warnings after every statement.", &show_warnings, &show_warnings, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"show-query-costs", OPT_SHOW_WARNINGS, + "Show query cost after every statement.", + &show_query_cost, &show_query_cost, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, {"plugin_dir", OPT_PLUGIN_DIR, "Directory for client-side plugins.", &opt_plugin_dir, &opt_plugin_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, @@ -3574,6 +3582,8 @@ end: /* Show warnings if any or error occurred */ if (show_warnings == 1 && (warnings >= 1 || error)) print_warnings(); + if (show_query_cost) + print_last_query_cost(); if (!error && !status.batch && (mysql.server_status & SERVER_STATUS_DB_DROPPED)) @@ -4178,6 +4188,30 @@ end: } +/* print_last_query_cost */ + +static void print_last_query_cost() +{ + const char *query; + char *end; + MYSQL_RES *result; + MYSQL_ROW cur; + + query= "show status like 'last_query_cost'"; + mysql_real_query_for_lazy(query, strlen(query)); + mysql_store_result_for_lazy(&result); + if (!result) + goto end; + + cur= mysql_fetch_row(result); + if (strtod(cur[1], &end) != 0.0) + tee_fprintf(PAGER, "%s: %s\n\n", cur[0], cur[1]); + +end: + mysql_free_result(result); +} + + static const char *array_value(const char **array, char key) { for (; *array; array+= 2) @@ -4753,6 +4787,18 @@ com_nowarnings(String *buffer __attribute__((unused)), return 0; } +static int +com_query_cost(String *buffer __attribute__((unused)), + char *line __attribute__((unused))) +{ + show_query_cost= 1 - show_query_cost; + if (show_query_cost) + put_info("Last_query_cost enabled.",INFO_INFO); + else + put_info("Last_query_cost disabled.",INFO_INFO); + return 0; +} + /* Gets argument from a command on the command line. If mode is not GET_NEXT, skips the command and returns the first argument. The line is modified by @@ -5008,6 +5054,10 @@ com_status(String *buffer __attribute__((unused)), ulonglong id; MYSQL_RES *UNINIT_VAR(result); + /* + Don't remove "limit 1", + it is protection against SQL_SELECT_LIMIT=0 + */ if (mysql_real_query_for_lazy( C_STRING_WITH_LEN("select DATABASE(), USER() limit 1"))) return 0; @@ -5015,10 +5065,6 @@ com_status(String *buffer __attribute__((unused)), tee_puts("--------------", stdout); usage(1); /* Print version */ tee_fprintf(stdout, "\nConnection id:\t\t%lu\n",mysql_thread_id(&mysql)); - /* - Don't remove "limit 1", - it is protection against SQL_SELECT_LIMIT=0 - */ if (!mysql_store_result_for_lazy(&result)) { MYSQL_ROW cur=mysql_fetch_row(result); diff --git a/include/my_getopt.h b/include/my_getopt.h index ffff706e015..b57ac19f294 100644 --- a/include/my_getopt.h +++ b/include/my_getopt.h @@ -40,6 +40,7 @@ C_MODE_START #define GET_FLAGSET 15 #define GET_BIT 16 +#define GET_ADJUST_VALUE 256 #define GET_ASK_ADDR 128 #define GET_AUTO 64 #define GET_TYPE_MASK 63 @@ -100,6 +101,7 @@ typedef my_bool (*my_get_one_option)(const struct my_option *, const char *, con typedef void *(*my_getopt_value)(const char *, uint, const struct my_option *, int *); +typedef void (*my_getopt_adjust)(const struct my_option *, void *); extern char *disabled_my_option; extern char *autoset_my_option; @@ -109,6 +111,7 @@ extern my_bool my_getopt_prefix_matching; extern my_bool my_handle_options_init_variables; extern my_error_reporter my_getopt_error_reporter; extern my_getopt_value my_getopt_get_addr; +extern my_getopt_adjust my_getopt_adjust_value; extern int handle_options (int *argc, char ***argv, const struct my_option *longopts, my_get_one_option) diff --git a/include/my_global.h b/include/my_global.h index a849597f468..9b74824255f 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -679,6 +679,7 @@ typedef SOCKET_SIZE_TYPE size_socket; Io buffer size; Must be a power of 2 and a multiple of 512. May be smaller what the disk page size. This influences the speed of the isam btree library. eg to big to slow. + 4096 is a common block size on SSDs. */ #define IO_SIZE 4096U /* diff --git a/include/my_tracker.h b/include/my_tracker.h new file mode 100644 index 00000000000..88cefe5ef5d --- /dev/null +++ b/include/my_tracker.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* + Trivial framework to add a tracker to a C function +*/ + +#include "my_rdtsc.h" + +struct my_time_tracker +{ + ulonglong counter; + ulonglong cycles; +}; + +#ifdef HAVE_TIME_TRACKING +#define START_TRACKING ulonglong my_start_time= my_timer_cycles() +#define END_TRACKING(var) \ + { \ + ulonglong my_end_time= my_timer_cycles(); \ + (var)->counter++; \ + (var)->cycles+= (unlikely(my_end_time < my_start_time) ? \ + my_end_time - my_start_time + ULONGLONG_MAX : \ + my_end_time - my_start_time); \ + } +#else +#define START_TRACKING +#define END_TRACKING(var) do { } while(0) +#endif diff --git a/include/myisam.h b/include/myisam.h index 0942584e874..ad86903bc07 100644 --- a/include/myisam.h +++ b/include/myisam.h @@ -435,6 +435,8 @@ int thr_write_keys(MI_SORT_PARAM *sort_param); int sort_write_record(MI_SORT_PARAM *sort_param); int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulonglong); my_bool mi_too_big_key_for_sort(MI_KEYDEF *key, ha_rows rows); +struct OPTIMIZER_COSTS; +void myisam_update_optimizer_costs(struct OPTIMIZER_COSTS *costs); #ifdef __cplusplus } diff --git a/mysql-test/include/analyze-format.inc b/mysql-test/include/analyze-format.inc index 7d1c48f3e6f..f9579555d7b 100644 --- a/mysql-test/include/analyze-format.inc +++ b/mysql-test/include/analyze-format.inc @@ -1,3 +1,3 @@ # The time on ANALYSE FORMAT=JSON is rather variable ---replace_regex /("(r_total_time_ms|r_table_time_ms|r_other_time_ms|r_buffer_size|r_filling_time_ms|r_query_time_in_progress_ms)": )[^, \n]*/\1"REPLACED"/ +--replace_regex /("(r_total_time_ms|r_table_time_ms|r_other_time_ms|r_buffer_size|r_filling_time_ms|r_query_time_in_progress_ms|r_unpack_time_ms)": )[^, \n]*/\1"REPLACED"/ diff --git a/mysql-test/main/analyze_format_json.result b/mysql-test/main/analyze_format_json.result index 9a756782f96..7fd783e4355 100644 --- a/mysql-test/main/analyze_format_json.result +++ b/mysql-test/main/analyze_format_json.result @@ -183,7 +183,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "1Kb", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] @@ -233,7 +234,8 @@ ANALYZE "buffer_size": "1Kb", "join_type": "BNL", "attached_condition": "tbl1.c > tbl2.c", - "r_filtered": 15.83333333 + "r_filtered": 15.83333333, + "r_unpack_time_ms": "REPLACED" } } ] @@ -769,13 +771,14 @@ ANALYZE "r_other_time_ms": "REPLACED", "filtered": 100, "r_filtered": 0, - "attached_condition": "<in_optimizer>(t2.b,t2.b in (subquery#2))" + "attached_condition": "<in_optimizer>(t2.b,<exists>(subquery#2))" }, "buffer_type": "flat", "buffer_size": "65", "join_type": "BNL", - "attached_condition": "<in_optimizer>(t2.b,t2.b in (subquery#2))", - "r_filtered": null + "attached_condition": "<in_optimizer>(t2.b,<exists>(subquery#2))", + "r_filtered": null, + "r_unpack_time_ms": "REPLACED" } } ], @@ -783,20 +786,21 @@ ANALYZE { "query_block": { "select_id": 2, - "r_loops": 1, + "r_loops": 2, "r_total_time_ms": "REPLACED", "nested_loop": [ { "table": { "table_name": "t1", "access_type": "ALL", - "r_loops": 1, + "r_loops": 2, "rows": 2, "r_rows": 2, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", "filtered": 100, - "r_filtered": 100 + "r_filtered": 0, + "attached_condition": "4 = t1.a" } } ] @@ -878,7 +882,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "1", "join_type": "BNL", - "r_filtered": null + "r_filtered": null, + "r_unpack_time_ms": "REPLACED" } } ], @@ -916,7 +921,8 @@ ANALYZE "buffer_size": "65", "join_type": "BNL", "attached_condition": "t2.f2 = t3.f3", - "r_filtered": null + "r_filtered": null, + "r_unpack_time_ms": "REPLACED" } } ] diff --git a/mysql-test/main/analyze_stmt_orderby.result b/mysql-test/main/analyze_stmt_orderby.result index 76bc4d964b8..e188f93c160 100644 --- a/mysql-test/main/analyze_stmt_orderby.result +++ b/mysql-test/main/analyze_stmt_orderby.result @@ -494,7 +494,8 @@ ANALYZE "buffer_size": "65", "join_type": "BNL", "attached_condition": "t3.a = t0.a", - "r_filtered": 10 + "r_filtered": 10, + "r_unpack_time_ms": "REPLACED" } } ] @@ -578,7 +579,8 @@ ANALYZE "buffer_size": "119", "join_type": "BNL", "attached_condition": "t5.a = t6.a", - "r_filtered": 21.42857143 + "r_filtered": 21.42857143, + "r_unpack_time_ms": "REPLACED" } } ] diff --git a/mysql-test/main/ctype_collate.result b/mysql-test/main/ctype_collate.result index 1ae9f295042..29d27fd608b 100644 --- a/mysql-test/main/ctype_collate.result +++ b/mysql-test/main/ctype_collate.result @@ -748,7 +748,7 @@ hex(b) explain select hex(b) from t1 where b<'zzz' order by b; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ALL PRIMARY NULL NULL NULL 4 Using where; Using filesort +1 SIMPLE t1 range PRIMARY PRIMARY 34 NULL 4 Using where; Using filesort select hex(b) from t1 where b<'zzz' order by b; hex(b) 00 diff --git a/mysql-test/main/delete.result b/mysql-test/main/delete.result index ed3683d52f9..77a1f8c9813 100644 --- a/mysql-test/main/delete.result +++ b/mysql-test/main/delete.result @@ -128,7 +128,6 @@ a b delete ignore t11.*, t12.* from t11,t12 where t11.a = t12.a and t11.b <> (select b from t2 where t11.a < t2.a); Warnings: Warning 1242 Subquery returns more than 1 row -Warning 1242 Subquery returns more than 1 row select * from t11; a b 0 10 diff --git a/mysql-test/main/except.result b/mysql-test/main/except.result index d83623370d5..4d23ca772b3 100644 --- a/mysql-test/main/except.result +++ b/mysql-test/main/except.result @@ -387,7 +387,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "119", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] @@ -429,7 +430,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "119", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] @@ -505,7 +507,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "119", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] @@ -547,7 +550,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "119", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] diff --git a/mysql-test/main/except_all.result b/mysql-test/main/except_all.result index df19abda077..e62062d0427 100644 --- a/mysql-test/main/except_all.result +++ b/mysql-test/main/except_all.result @@ -514,7 +514,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "119", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] @@ -556,7 +557,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "119", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] @@ -631,7 +633,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "119", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] @@ -673,7 +676,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "119", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] diff --git a/mysql-test/main/fetch_first.result b/mysql-test/main/fetch_first.result index e36f2db4100..c277362739a 100644 --- a/mysql-test/main/fetch_first.result +++ b/mysql-test/main/fetch_first.result @@ -843,6 +843,7 @@ fetch first 2 rows with ties; first_name last_name Alice Fowler Bob Trasc +Silvia Ganush # # Test CTE support. # @@ -858,7 +859,7 @@ select * from temp_table order by first_name, last_name; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY <derived2> ALL NULL NULL NULL NULL 2 Using filesort -2 DERIVED t1 range t1_name t1_name 103 NULL 3 Using where; Using index +2 DERIVED t1 range t1_name t1_name 206 NULL 3 Using where; Using index for group-by with temp_table as ( select first_name, last_name from t1 diff --git a/mysql-test/main/fulltext_order_by.test b/mysql-test/main/fulltext_order_by.test index 9fddf3b2fec..4c0333d0afb 100644 --- a/mysql-test/main/fulltext_order_by.test +++ b/mysql-test/main/fulltext_order_by.test @@ -36,6 +36,7 @@ SELECT IF(a=7,'match',IF(a=4,'match', 'no-match')), MATCH (message) AGAINST ('st # for fulltext searches too # alter table t1 add key m (message); +show create table t1; explain SELECT message FROM t1 WHERE MATCH (message) AGAINST ('steve') ORDER BY message; SELECT message FROM t1 WHERE MATCH (message) AGAINST ('steve') ORDER BY message desc; diff --git a/mysql-test/main/information_schema_all_engines.result b/mysql-test/main/information_schema_all_engines.result index 23a853e363c..db9bf156b8e 100644 --- a/mysql-test/main/information_schema_all_engines.result +++ b/mysql-test/main/information_schema_all_engines.result @@ -42,6 +42,7 @@ INNODB_TRX KEYWORDS KEY_CACHES KEY_COLUMN_USAGE +OPTIMIZER_COSTS OPTIMIZER_TRACE PARAMETERS PARTITIONS @@ -123,6 +124,7 @@ INNODB_TRX trx_id KEYWORDS WORD KEY_CACHES KEY_CACHE_NAME KEY_COLUMN_USAGE CONSTRAINT_SCHEMA +OPTIMIZER_COSTS ENGINE OPTIMIZER_TRACE QUERY PARAMETERS SPECIFIC_SCHEMA PARTITIONS TABLE_SCHEMA @@ -204,6 +206,7 @@ INNODB_TRX trx_id KEYWORDS WORD KEY_CACHES KEY_CACHE_NAME KEY_COLUMN_USAGE CONSTRAINT_SCHEMA +OPTIMIZER_COSTS ENGINE OPTIMIZER_TRACE QUERY PARAMETERS SPECIFIC_SCHEMA PARTITIONS TABLE_SCHEMA @@ -289,6 +292,7 @@ INNODB_TABLESPACES_ENCRYPTION information_schema.INNODB_TABLESPACES_ENCRYPTION 1 INNODB_TRX information_schema.INNODB_TRX 1 KEY_CACHES information_schema.KEY_CACHES 1 KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 +OPTIMIZER_COSTS information_schema.OPTIMIZER_COSTS 1 OPTIMIZER_TRACE information_schema.OPTIMIZER_TRACE 1 PARAMETERS information_schema.PARAMETERS 1 PARTITIONS information_schema.PARTITIONS 1 @@ -359,6 +363,7 @@ Database: information_schema | KEYWORDS | | KEY_CACHES | | KEY_COLUMN_USAGE | +| OPTIMIZER_COSTS | | OPTIMIZER_TRACE | | PARAMETERS | | PARTITIONS | @@ -430,6 +435,7 @@ Database: INFORMATION_SCHEMA | KEYWORDS | | KEY_CACHES | | KEY_COLUMN_USAGE | +| OPTIMIZER_COSTS | | OPTIMIZER_TRACE | | PARAMETERS | | PARTITIONS | @@ -463,5 +469,5 @@ Wildcard: inf_rmation_schema | information_schema | SELECT table_schema, count(*) FROM information_schema.TABLES WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test', 'mysqltest') GROUP BY TABLE_SCHEMA; table_schema count(*) -information_schema 66 +information_schema 67 mysql 31 diff --git a/mysql-test/main/intersect.result b/mysql-test/main/intersect.result index 425f6940a35..299737e794b 100644 --- a/mysql-test/main/intersect.result +++ b/mysql-test/main/intersect.result @@ -462,7 +462,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "256Kb", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] @@ -560,7 +561,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "256Kb", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] diff --git a/mysql-test/main/intersect_all.result b/mysql-test/main/intersect_all.result index aecd5b1ac18..e47a8872211 100644 --- a/mysql-test/main/intersect_all.result +++ b/mysql-test/main/intersect_all.result @@ -493,7 +493,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "65", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] @@ -591,7 +592,8 @@ ANALYZE "buffer_type": "flat", "buffer_size": "65", "join_type": "BNL", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } } ] diff --git a/mysql-test/main/key.result b/mysql-test/main/key.result index 762218f8580..42d57a35531 100644 --- a/mysql-test/main/key.result +++ b/mysql-test/main/key.result @@ -631,19 +631,19 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using temporary; Using filesort SHOW STATUS LIKE 'Last_query_cost'; Variable_name Value -Last_query_cost 8.506592 +Last_query_cost 0.014749 EXPLAIN SELECT a, SUM( b ) FROM t1 USE INDEX( a ) GROUP BY a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 6 Using temporary; Using filesort SHOW STATUS LIKE 'Last_query_cost'; Variable_name Value -Last_query_cost 8.506592 +Last_query_cost 0.014749 EXPLAIN SELECT a, SUM( b ) FROM t1 FORCE INDEX( a ) GROUP BY a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 index NULL a 5 NULL 6 SHOW STATUS LIKE 'Last_query_cost'; Variable_name Value -Last_query_cost 8.506592 +Last_query_cost 0.014749 DROP TABLE t1; # # MDEV-21480: Unique key using ref access though eq_ref access can be used diff --git a/mysql-test/main/myisam.result b/mysql-test/main/myisam.result index cb163bb29ff..aae3ea173b7 100644 --- a/mysql-test/main/myisam.result +++ b/mysql-test/main/myisam.result @@ -348,11 +348,11 @@ t1 1 c_2 2 a A 5 NULL NULL BTREE NO explain select * from t1,t2 where t1.a=t2.a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 ALL a NULL NULL NULL 2 -1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join) +1 SIMPLE t1 ref a a 4 test.t2.a 3 explain select * from t1,t2 force index(a) where t1.a=t2.a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 ALL a NULL NULL NULL 2 -1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join) +1 SIMPLE t1 ref a a 4 test.t2.a 3 explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 ALL a NULL NULL NULL 2 @@ -388,10 +388,10 @@ t1 1 c_2 2 a A 5 NULL NULL BTREE NO explain select * from t1,t2 force index(c) where t1.a=t2.a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 ALL NULL NULL NULL NULL 2 -1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where; Using join buffer (flat, BNL join) +1 SIMPLE t1 ref a a 4 test.t2.a 3 explain select * from t1 where a=0 or a=2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ALL a NULL NULL NULL 5 Using where +1 SIMPLE t1 range a a 4 NULL 5 Using index condition explain select * from t1 force index (a) where a=0 or a=2; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 4 NULL 5 Using index condition @@ -640,7 +640,7 @@ create table t1 ( a tinytext, b char(1), index idx (a(1),b) ); insert into t1 values (null,''), (null,''); explain select count(*) from t1 where a is null; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ALL idx NULL NULL NULL 2 Using where +1 SIMPLE t1 ref idx idx 4 const 2 Using where select count(*) from t1 where a is null; count(*) 2 diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 55a316aca55..cc62e406e26 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -720,11 +720,15 @@ The following specify which files/extra groups are read (specified before remain max_connections*5 or max_connections + table_cache*2 (whichever is larger) number of file descriptors (Automatically configured unless set explicitly) - --optimizer-cache-hit-ratio=# - Expected hit rate of the row and index cache in storage - engines. The value should be an integer between 0 and 99, - where 0 means cache is empty and 99 means that value is - almost always in the cache. + --optimizer-disk-read-cost=# + Cost of reading a block of IO_SIZE (4096) from a disk (in + usec). + --optimizer-disk-read-ratio=# + Chance that we have to do a disk read to find a row or + index entry from the engine cache + (cache_misses/total_cache_requests). 0.0 means that + everything is cached and 1.0 means that nothing is + expected to be in the engine cache. --optimizer-extra-pruning-depth=# If the optimizer needs to enumerate join prefix of this size or larger, then it will try agressively prune away @@ -737,6 +741,8 @@ The following specify which files/extra groups are read (specified before remain --optimizer-key-copy-cost=# Cost of finding the next key in the engine and copying it to the SQL layer. + --optimizer-key-lookup-cost=# + Cost for finding a key based on a key value --optimizer-key-next-find-cost=# Cost of finding the next key and rowid when using filters. @@ -753,6 +759,14 @@ The following specify which files/extra groups are read (specified before remain --optimizer-row-copy-cost=# Cost of copying a row from the engine or the join cache to the SQL layer. + --optimizer-row-lookup-cost=# + Cost of finding a row based on a rowid or a clustered + key. + --optimizer-row-next-find-cost=# + Cost of finding the next row when scanning the table. + --optimizer-scan-setup-cost=# + Extra cost added to TABLE and INDEX scans to get + optimizer to prefer index lookups. --optimizer-search-depth=# Maximum depth of search performed by the query optimizer. Values larger than the number of relations in a query @@ -807,6 +821,8 @@ The following specify which files/extra groups are read (specified before remain record samples --optimizer-where-cost=# Cost of checking the row against the WHERE clause. + Increasing this will have the optimizer to prefer plans + with less row combinations. --performance-schema Enable the performance schema. --performance-schema-accounts-size=# @@ -1719,22 +1735,27 @@ old-alter-table DEFAULT old-mode UTF8_IS_UTF8MB3 old-passwords FALSE old-style-user-limits FALSE -optimizer-cache-hit-ratio 50 +optimizer-disk-read-cost 0.01024 +optimizer-disk-read-ratio 0.02 optimizer-extra-pruning-depth 8 -optimizer-index-block-copy-cost 0.2 -optimizer-key-compare-cost 0.05 -optimizer-key-copy-cost 0.025 -optimizer-key-next-find-cost 0.0125 +optimizer-index-block-copy-cost 3.56e-05 +optimizer-key-compare-cost 1.1361e-05 +optimizer-key-copy-cost 1.5685e-05 +optimizer-key-lookup-cost 0.000435777 +optimizer-key-next-find-cost 8.2347e-05 optimizer-max-sel-arg-weight 32000 optimizer-prune-level 2 -optimizer-row-copy-cost 0.05 +optimizer-row-copy-cost 6.0866e-05 +optimizer-row-lookup-cost 0.000130839 +optimizer-row-next-find-cost 4.5916e-05 +optimizer-scan-setup-cost 0.01 optimizer-search-depth 62 optimizer-selectivity-sampling-limit 100 optimizer-switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_merge_sort_intersection=off,engine_condition_pushdown=off,index_condition_pushdown=on,derived_merge=on,derived_with_keys=on,firstmatch=on,loosescan=on,materialization=on,in_to_exists=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on,subquery_cache=on,mrr=off,mrr_cost_based=off,mrr_sort_keys=off,outer_join_with_cache=on,semijoin_with_cache=on,join_cache_incremental=on,join_cache_hashed=on,join_cache_bka=on,optimize_join_buffer_size=on,table_elimination=on,extended_keys=on,exists_to_in=on,orderby_uses_equalities=on,condition_pushdown_for_derived=on,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on optimizer-trace optimizer-trace-max-mem-size 1048576 optimizer-use-condition-selectivity 4 -optimizer-where-cost 0.2 +optimizer-where-cost 3.2e-05 performance-schema FALSE performance-schema-accounts-size -1 performance-schema-consumer-events-stages-current FALSE diff --git a/mysql-test/main/opt_trace_security.result b/mysql-test/main/opt_trace_security.result index 5334ed8b2c4..f440aa381c3 100644 --- a/mysql-test/main/opt_trace_security.result +++ b/mysql-test/main/opt_trace_security.result @@ -80,8 +80,8 @@ select * from db1.t1 { "table": "t1", "table_scan": { "rows": 3, - "read_cost": 1.002563477, - "read_and_compare_cost": 1.752563477 + "read_cost": 0.010373215, + "read_and_compare_cost": 0.010469215 } } ] @@ -101,18 +101,18 @@ select * from db1.t1 { { "access_type": "scan", "rows": 3, - "rows_after_scan": 3, "rows_after_filter": 3, - "cost": 1.752563477, + "rows_out": 3, + "cost": 0.010469215, "index_only": false, "chosen": true } ], "chosen_access_method": { "type": "scan", - "records_read": 3, - "records_out": 3, - "cost": 1.752563477, + "rows_read": 3, + "rows_out": 3, + "cost": 0.010469215, "uses_join_buffering": false } } @@ -123,14 +123,14 @@ select * from db1.t1 { "plan_prefix": [], "table": "t1", "rows_for_plan": 3, - "cost_for_plan": 1.752563477 + "cost_for_plan": 0.010469215 } ] }, { "best_join_order": ["t1"], "rows": 3, - "cost": 1.752563477 + "cost": 0.010469215 }, { "attaching_conditions_to_tables": { @@ -219,8 +219,8 @@ select * from db1.v1 { "table": "t1", "table_scan": { "rows": 3, - "read_cost": 1.002563477, - "read_and_compare_cost": 1.752563477 + "read_cost": 0.010373215, + "read_and_compare_cost": 0.010469215 } } ] @@ -240,18 +240,18 @@ select * from db1.v1 { { "access_type": "scan", "rows": 3, - "rows_after_scan": 3, "rows_after_filter": 3, - "cost": 1.752563477, + "rows_out": 3, + "cost": 0.010469215, "index_only": false, "chosen": true } ], "chosen_access_method": { "type": "scan", - "records_read": 3, - "records_out": 3, - "cost": 1.752563477, + "rows_read": 3, + "rows_out": 3, + "cost": 0.010469215, "uses_join_buffering": false } } @@ -262,14 +262,14 @@ select * from db1.v1 { "plan_prefix": [], "table": "t1", "rows_for_plan": 3, - "cost_for_plan": 1.752563477 + "cost_for_plan": 0.010469215 } ] }, { "best_join_order": ["t1"], "rows": 3, - "cost": 1.752563477 + "cost": 0.010469215 }, { "attaching_conditions_to_tables": { diff --git a/mysql-test/main/opt_trace_ucs2.result b/mysql-test/main/opt_trace_ucs2.result index 1ced3cd6dc2..5ae8a5fd6b8 100644 --- a/mysql-test/main/opt_trace_ucs2.result +++ b/mysql-test/main/opt_trace_ucs2.result @@ -42,7 +42,7 @@ JSON_DETAILED(JSON_EXTRACT(trace, '$**.analyzing_range_alternatives')) "using_mrr": false, "index_only": false, "rows": 2, - "cost": 2.022733708, + "cost": 0.003717837, "chosen": true } ], diff --git a/mysql-test/main/rowid_filter_innodb.result b/mysql-test/main/rowid_filter_innodb.result index fb7f853dd64..c1f47d40ce1 100644 --- a/mysql-test/main/rowid_filter_innodb.result +++ b/mysql-test/main/rowid_filter_innodb.result @@ -244,7 +244,7 @@ EXPLAIN "key_length": "4", "used_key_parts": ["l_shipDATE"], "rows": 510, - "filtered": 100, + "filtered": 10.07493782, "index_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'", "attached_condition": "lineitem.l_quantity > 45" } @@ -256,7 +256,7 @@ set statement optimizer_switch='rowid_filter=off' for ANALYZE SELECT l_orderkey, WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND l_quantity > 45; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE lineitem range i_l_shipdate,i_l_quantity i_l_shipdate 4 NULL 510 510.00 100.00 11.76 Using index condition; Using where +1 SIMPLE lineitem range i_l_shipdate,i_l_quantity i_l_shipdate 4 NULL 510 510.00 10.07 11.76 Using index condition; Using where set statement optimizer_switch='rowid_filter=off' for ANALYZE FORMAT=JSON SELECT l_orderkey, l_linenumber, l_shipdate, l_quantity FROM lineitem WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND l_quantity > 45; @@ -283,7 +283,7 @@ ANALYZE "r_rows": 510, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 100, + "filtered": 10.07493782, "r_filtered": 11.76470588, "index_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'", "attached_condition": "lineitem.l_quantity > 45" @@ -361,8 +361,8 @@ FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND o_totalprice between 200000 and 230000; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 Using where; Using index -1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 Using where +1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 Using where; Using index +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 Using where set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND @@ -374,35 +374,35 @@ EXPLAIN "nested_loop": [ { "table": { - "table_name": "lineitem", + "table_name": "orders", "access_type": "range", - "possible_keys": [ - "PRIMARY", - "i_l_shipdate", - "i_l_orderkey", - "i_l_orderkey_quantity" - ], - "key": "i_l_shipdate", - "key_length": "4", - "used_key_parts": ["l_shipDATE"], - "rows": 98, + "possible_keys": ["PRIMARY", "i_o_totalprice"], + "key": "i_o_totalprice", + "key_length": "9", + "used_key_parts": ["o_totalprice"], + "rows": 71, "filtered": 100, - "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'", + "attached_condition": "orders.o_totalprice between 200000 and 230000", "using_index": true } }, { "table": { - "table_name": "orders", - "access_type": "eq_ref", - "possible_keys": ["PRIMARY", "i_o_totalprice"], + "table_name": "lineitem", + "access_type": "ref", + "possible_keys": [ + "PRIMARY", + "i_l_shipdate", + "i_l_orderkey", + "i_l_orderkey_quantity" + ], "key": "PRIMARY", "key_length": "4", - "used_key_parts": ["o_orderkey"], - "ref": ["dbt3_s001.lineitem.l_orderkey"], - "rows": 1, - "filtered": 4.733333111, - "attached_condition": "orders.o_totalprice between 200000 and 230000" + "used_key_parts": ["l_orderkey"], + "ref": ["dbt3_s001.orders.o_orderkey"], + "rows": 4, + "filtered": 1.633319736, + "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'" } } ] @@ -413,8 +413,8 @@ FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND o_totalprice between 200000 and 230000; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 98.00 100.00 100.00 Using where; Using index -1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 4.73 11.22 Using where +1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 71.00 100.00 100.00 Using where; Using index +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 1.63 2.31 Using where set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND @@ -431,45 +431,45 @@ ANALYZE "nested_loop": [ { "table": { - "table_name": "lineitem", + "table_name": "orders", "access_type": "range", - "possible_keys": [ - "PRIMARY", - "i_l_shipdate", - "i_l_orderkey", - "i_l_orderkey_quantity" - ], - "key": "i_l_shipdate", - "key_length": "4", - "used_key_parts": ["l_shipDATE"], + "possible_keys": ["PRIMARY", "i_o_totalprice"], + "key": "i_o_totalprice", + "key_length": "9", + "used_key_parts": ["o_totalprice"], "r_loops": 1, - "rows": 98, - "r_rows": 98, + "rows": 71, + "r_rows": 71, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", "filtered": 100, "r_filtered": 100, - "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'", + "attached_condition": "orders.o_totalprice between 200000 and 230000", "using_index": true } }, { "table": { - "table_name": "orders", - "access_type": "eq_ref", - "possible_keys": ["PRIMARY", "i_o_totalprice"], + "table_name": "lineitem", + "access_type": "ref", + "possible_keys": [ + "PRIMARY", + "i_l_shipdate", + "i_l_orderkey", + "i_l_orderkey_quantity" + ], "key": "PRIMARY", "key_length": "4", - "used_key_parts": ["o_orderkey"], - "ref": ["dbt3_s001.lineitem.l_orderkey"], - "r_loops": 98, - "rows": 1, - "r_rows": 1, + "used_key_parts": ["l_orderkey"], + "ref": ["dbt3_s001.orders.o_orderkey"], + "r_loops": 71, + "rows": 4, + "r_rows": 6.704225352, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 4.733333111, - "r_filtered": 11.2244898, - "attached_condition": "orders.o_totalprice between 200000 and 230000" + "filtered": 1.633319736, + "r_filtered": 2.31092437, + "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'" } } ] @@ -496,8 +496,8 @@ FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND o_totalprice between 200000 and 230000; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 Using where; Using index -1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 Using where +1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 Using where; Using index +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 Using where set statement optimizer_switch='rowid_filter=off' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND @@ -509,35 +509,35 @@ EXPLAIN "nested_loop": [ { "table": { - "table_name": "lineitem", + "table_name": "orders", "access_type": "range", - "possible_keys": [ - "PRIMARY", - "i_l_shipdate", - "i_l_orderkey", - "i_l_orderkey_quantity" - ], - "key": "i_l_shipdate", - "key_length": "4", - "used_key_parts": ["l_shipDATE"], - "rows": 98, + "possible_keys": ["PRIMARY", "i_o_totalprice"], + "key": "i_o_totalprice", + "key_length": "9", + "used_key_parts": ["o_totalprice"], + "rows": 71, "filtered": 100, - "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'", + "attached_condition": "orders.o_totalprice between 200000 and 230000", "using_index": true } }, { "table": { - "table_name": "orders", - "access_type": "eq_ref", - "possible_keys": ["PRIMARY", "i_o_totalprice"], + "table_name": "lineitem", + "access_type": "ref", + "possible_keys": [ + "PRIMARY", + "i_l_shipdate", + "i_l_orderkey", + "i_l_orderkey_quantity" + ], "key": "PRIMARY", "key_length": "4", - "used_key_parts": ["o_orderkey"], - "ref": ["dbt3_s001.lineitem.l_orderkey"], - "rows": 1, - "filtered": 4.733333111, - "attached_condition": "orders.o_totalprice between 200000 and 230000" + "used_key_parts": ["l_orderkey"], + "ref": ["dbt3_s001.orders.o_orderkey"], + "rows": 4, + "filtered": 1.633319736, + "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'" } } ] @@ -548,8 +548,8 @@ FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND o_totalprice between 200000 and 230000; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_shipdate 4 NULL 98 98.00 100.00 100.00 Using where; Using index -1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 4.73 11.22 Using where +1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 71.00 100.00 100.00 Using where; Using index +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 1.63 2.31 Using where set statement optimizer_switch='rowid_filter=off' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-01-31' AND @@ -566,45 +566,45 @@ ANALYZE "nested_loop": [ { "table": { - "table_name": "lineitem", + "table_name": "orders", "access_type": "range", - "possible_keys": [ - "PRIMARY", - "i_l_shipdate", - "i_l_orderkey", - "i_l_orderkey_quantity" - ], - "key": "i_l_shipdate", - "key_length": "4", - "used_key_parts": ["l_shipDATE"], + "possible_keys": ["PRIMARY", "i_o_totalprice"], + "key": "i_o_totalprice", + "key_length": "9", + "used_key_parts": ["o_totalprice"], "r_loops": 1, - "rows": 98, - "r_rows": 98, + "rows": 71, + "r_rows": 71, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", "filtered": 100, "r_filtered": 100, - "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'", + "attached_condition": "orders.o_totalprice between 200000 and 230000", "using_index": true } }, { "table": { - "table_name": "orders", - "access_type": "eq_ref", - "possible_keys": ["PRIMARY", "i_o_totalprice"], + "table_name": "lineitem", + "access_type": "ref", + "possible_keys": [ + "PRIMARY", + "i_l_shipdate", + "i_l_orderkey", + "i_l_orderkey_quantity" + ], "key": "PRIMARY", "key_length": "4", - "used_key_parts": ["o_orderkey"], - "ref": ["dbt3_s001.lineitem.l_orderkey"], - "r_loops": 98, - "rows": 1, - "r_rows": 1, + "used_key_parts": ["l_orderkey"], + "ref": ["dbt3_s001.orders.o_orderkey"], + "r_loops": 71, + "rows": 4, + "r_rows": 6.704225352, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 4.733333111, - "r_filtered": 11.2244898, - "attached_condition": "orders.o_totalprice between 200000 and 230000" + "filtered": 1.633319736, + "r_filtered": 2.31092437, + "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-01-31'" } } ] @@ -633,7 +633,7 @@ l_quantity > 45 AND o_totalprice between 180000 and 230000; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 144 Using where; Using index -1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (8%) Using where; Using rowid filter +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 Using where set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND @@ -669,20 +669,12 @@ EXPLAIN "i_l_orderkey_quantity", "i_l_quantity" ], - "key": "i_l_orderkey", + "key": "PRIMARY", "key_length": "4", "used_key_parts": ["l_orderkey"], "ref": ["dbt3_s001.orders.o_orderkey"], - "rowid_filter": { - "range": { - "key": "i_l_shipdate", - "used_key_parts": ["l_shipDATE"] - }, - "rows": 510, - "selectivity_pct": 8.492922565 - }, "rows": 4, - "filtered": 0.855656624, + "filtered": 0.856362581, "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30' and lineitem.l_quantity > 45" } } @@ -696,7 +688,7 @@ l_quantity > 45 AND o_totalprice between 180000 and 230000; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 144 144.00 100.00 100.00 Using where; Using index -1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (8%) 0.54 (8%) 0.86 20.51 Using where; Using rowid filter +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity,i_l_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.62 0.86 1.68 Using where set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, l_quantity, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND @@ -742,29 +734,17 @@ ANALYZE "i_l_orderkey_quantity", "i_l_quantity" ], - "key": "i_l_orderkey", + "key": "PRIMARY", "key_length": "4", "used_key_parts": ["l_orderkey"], "ref": ["dbt3_s001.orders.o_orderkey"], - "rowid_filter": { - "range": { - "key": "i_l_shipdate", - "used_key_parts": ["l_shipDATE"] - }, - "rows": 510, - "selectivity_pct": 8.492922565, - "r_rows": 510, - "r_selectivity_pct": 8.176100629, - "r_buffer_size": "REPLACED", - "r_filling_time_ms": "REPLACED" - }, "r_loops": 144, "rows": 4, - "r_rows": 0.541666667, + "r_rows": 6.625, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 0.855656624, - "r_filtered": 20.51282051, + "filtered": 0.856362581, + "r_filtered": 1.677148847, "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30' and lineitem.l_quantity > 45" } } @@ -841,7 +821,7 @@ EXPLAIN "used_key_parts": ["l_orderkey"], "ref": ["dbt3_s001.orders.o_orderkey"], "rows": 4, - "filtered": 0.855656624, + "filtered": 0.856362581, "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30' and lineitem.l_quantity > 45" } } @@ -910,7 +890,7 @@ ANALYZE "r_rows": 6.625, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 0.855656624, + "filtered": 0.856362581, "r_filtered": 1.677148847, "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30' and lineitem.l_quantity > 45" } @@ -993,7 +973,7 @@ EXPLAIN "used_key_parts": ["l_orderkey"], "ref": ["dbt3_s001.orders.o_orderkey"], "rows": 4, - "filtered": 8.492922783, + "filtered": 8.499929428, "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'" } } @@ -1006,7 +986,7 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND o_totalprice between 200000 and 230000; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 71.00 100.00 100.00 Using where; Using index -1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 8.49 7.77 Using where +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 8.50 7.77 Using where set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND @@ -1059,7 +1039,7 @@ ANALYZE "r_rows": 6.704225352, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 8.492922783, + "filtered": 8.499929428, "r_filtered": 7.773109244, "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'" } @@ -1154,7 +1134,7 @@ EXPLAIN "used_key_parts": ["l_orderkey"], "ref": ["dbt3_s001.orders.o_orderkey"], "rows": 4, - "filtered": 8.492922783, + "filtered": 8.499929428, "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'" } } @@ -1167,7 +1147,7 @@ WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND o_totalprice between 200000 and 230000; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE orders range PRIMARY,i_o_totalprice i_o_totalprice 9 NULL 71 71.00 100.00 100.00 Using where; Using index -1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 8.49 7.77 Using where +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.70 8.50 7.77 Using where set statement optimizer_switch='rowid_filter=off' for ANALYZE FORMAT=JSON SELECT o_orderkey, l_linenumber, l_shipdate, o_totalprice FROM orders JOIN lineitem ON o_orderkey=l_orderkey WHERE l_shipdate BETWEEN '1997-01-01' AND '1997-06-30' AND @@ -1220,7 +1200,7 @@ ANALYZE "r_rows": 6.704225352, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 8.492922783, + "filtered": 8.499929428, "r_filtered": 7.773109244, "attached_condition": "lineitem.l_shipDATE between '1997-01-01' and '1997-06-30'" } @@ -1312,7 +1292,7 @@ EXPLAIN "key_length": "4", "used_key_parts": ["l_receiptDATE"], "rows": 18, - "filtered": 100, + "filtered": 0.566194832, "index_condition": "lineitem.l_receiptDATE between '1996-10-05' and '1996-10-10'", "attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-10-10'" } @@ -1341,7 +1321,7 @@ l_shipdate BETWEEN '1996-10-01' AND '1996-10-10' AND l_receiptdate BETWEEN '1996-10-05' AND '1996-10-10' AND o_totalprice BETWEEN 200000 AND 250000; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_receiptdate,i_l_orderkey,i_l_orderkey_quantity i_l_receiptdate 4 NULL 18 18.00 100.00 38.89 Using index condition; Using where +1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_receiptdate,i_l_orderkey,i_l_orderkey_quantity i_l_receiptdate 4 NULL 18 18.00 0.57 38.89 Using index condition; Using where 1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 5.67 14.29 Using where set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT l_shipdate, l_receiptdate, o_totalprice FROM orders, lineitem @@ -1378,7 +1358,7 @@ ANALYZE "r_rows": 18, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 100, + "filtered": 0.566194832, "r_filtered": 38.88888889, "index_condition": "lineitem.l_receiptDATE between '1996-10-05' and '1996-10-10'", "attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-10-10'" @@ -1449,7 +1429,7 @@ EXPLAIN "key_length": "4", "used_key_parts": ["l_receiptDATE"], "rows": 18, - "filtered": 100, + "filtered": 0.566194832, "index_condition": "lineitem.l_receiptDATE between '1996-10-05' and '1996-10-10'", "attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-10-10'" } @@ -1478,7 +1458,7 @@ l_shipdate BETWEEN '1996-10-01' AND '1996-10-10' AND l_receiptdate BETWEEN '1996-10-05' AND '1996-10-10' AND o_totalprice BETWEEN 200000 AND 250000; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_receiptdate,i_l_orderkey,i_l_orderkey_quantity i_l_receiptdate 4 NULL 18 18.00 100.00 38.89 Using index condition; Using where +1 SIMPLE lineitem range PRIMARY,i_l_shipdate,i_l_receiptdate,i_l_orderkey,i_l_orderkey_quantity i_l_receiptdate 4 NULL 18 18.00 0.57 38.89 Using index condition; Using where 1 SIMPLE orders eq_ref PRIMARY,i_o_totalprice PRIMARY 4 dbt3_s001.lineitem.l_orderkey 1 1.00 5.67 14.29 Using where set statement optimizer_switch='rowid_filter=off' for ANALYZE FORMAT=JSON SELECT l_shipdate, l_receiptdate, o_totalprice FROM orders, lineitem @@ -1515,7 +1495,7 @@ ANALYZE "r_rows": 18, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 100, + "filtered": 0.566194832, "r_filtered": 38.88888889, "index_condition": "lineitem.l_receiptDATE between '1996-10-05' and '1996-10-10'", "attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-10-10'" @@ -1566,7 +1546,7 @@ o_totalprice BETWEEN 200000 AND 220000 AND l_shipdate BETWEEN '1996-10-01' AND '1996-12-01'; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE orders range PRIMARY,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 Using index condition; Using where -1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (3%) Using where; Using rowid filter +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 Using where set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_totaldiscount, o_totalprice, l_shipdate FROM orders, lineitem WHERE o_orderkey=l_orderkey AND @@ -1587,7 +1567,7 @@ EXPLAIN "key_length": "9", "used_key_parts": ["o_totaldiscount"], "rows": 41, - "filtered": 100, + "filtered": 3.333333254, "index_condition": "orders.o_totaldiscount between 18000 and 20000", "attached_condition": "orders.o_totalprice between 200000 and 220000" } @@ -1602,20 +1582,12 @@ EXPLAIN "i_l_orderkey", "i_l_orderkey_quantity" ], - "key": "i_l_orderkey", + "key": "PRIMARY", "key_length": "4", "used_key_parts": ["l_orderkey"], "ref": ["dbt3_s001.orders.o_orderkey"], - "rowid_filter": { - "range": { - "key": "i_l_shipdate", - "used_key_parts": ["l_shipDATE"] - }, - "rows": 183, - "selectivity_pct": 3.04746045 - }, "rows": 4, - "filtered": 3.047460556, + "filtered": 3.04997468, "attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'" } } @@ -1629,8 +1601,8 @@ o_totaldiscount BETWEEN 18000 AND 20000 AND o_totalprice BETWEEN 200000 AND 220000 AND l_shipdate BETWEEN '1996-10-01' AND '1996-12-01'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE orders range PRIMARY,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 41.00 100.00 2.44 Using index condition; Using where -1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (3%) 4.00 (66%) 3.05 100.00 Using where; Using rowid filter +1 SIMPLE orders range PRIMARY,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 41.00 3.33 2.44 Using index condition; Using where +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.00 3.05 66.67 Using where set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_totaldiscount, o_totalprice, l_shipdate FROM orders, lineitem WHERE o_orderkey=l_orderkey AND @@ -1660,7 +1632,7 @@ ANALYZE "r_rows": 41, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 100, + "filtered": 3.333333254, "r_filtered": 2.43902439, "index_condition": "orders.o_totaldiscount between 18000 and 20000", "attached_condition": "orders.o_totalprice between 200000 and 220000" @@ -1676,29 +1648,17 @@ ANALYZE "i_l_orderkey", "i_l_orderkey_quantity" ], - "key": "i_l_orderkey", + "key": "PRIMARY", "key_length": "4", "used_key_parts": ["l_orderkey"], "ref": ["dbt3_s001.orders.o_orderkey"], - "rowid_filter": { - "range": { - "key": "i_l_shipdate", - "used_key_parts": ["l_shipDATE"] - }, - "rows": 183, - "selectivity_pct": 3.04746045, - "r_rows": 183, - "r_selectivity_pct": 66.66666667, - "r_buffer_size": "REPLACED", - "r_filling_time_ms": "REPLACED" - }, "r_loops": 1, "rows": 4, - "r_rows": 4, + "r_rows": 6, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 3.047460556, - "r_filtered": 100, + "filtered": 3.04997468, + "r_filtered": 66.66666667, "attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'" } } @@ -1745,7 +1705,7 @@ EXPLAIN "key_length": "9", "used_key_parts": ["o_totaldiscount"], "rows": 41, - "filtered": 100, + "filtered": 3.333333254, "index_condition": "orders.o_totaldiscount between 18000 and 20000", "attached_condition": "orders.o_totalprice between 200000 and 220000" } @@ -1765,7 +1725,7 @@ EXPLAIN "used_key_parts": ["l_orderkey"], "ref": ["dbt3_s001.orders.o_orderkey"], "rows": 4, - "filtered": 3.047460556, + "filtered": 3.04997468, "attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'" } } @@ -1779,7 +1739,7 @@ o_totaldiscount BETWEEN 18000 AND 20000 AND o_totalprice BETWEEN 200000 AND 220000 AND l_shipdate BETWEEN '1996-10-01' AND '1996-12-01'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE orders range PRIMARY,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 41.00 100.00 2.44 Using index condition; Using where +1 SIMPLE orders range PRIMARY,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 41.00 3.33 2.44 Using index condition; Using where 1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.00 3.05 66.67 Using where set statement optimizer_switch='rowid_filter=off' for ANALYZE FORMAT=JSON SELECT o_totaldiscount, o_totalprice, l_shipdate FROM orders, lineitem @@ -1810,7 +1770,7 @@ ANALYZE "r_rows": 41, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 100, + "filtered": 3.333333254, "r_filtered": 2.43902439, "index_condition": "orders.o_totaldiscount between 18000 and 20000", "attached_condition": "orders.o_totalprice between 200000 and 220000" @@ -1835,7 +1795,7 @@ ANALYZE "r_rows": 6, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 3.047460556, + "filtered": 3.04997468, "r_filtered": 66.66666667, "attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'" } @@ -1865,7 +1825,7 @@ o_totalprice BETWEEN 200000 AND 220000 AND l_shipdate BETWEEN '1996-10-01' AND '1996-12-01'; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE orders range PRIMARY,i_o_orderdate,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 Using index condition; Using where -1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (3%) Using where; Using rowid filter +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 Using where set statement optimizer_switch='rowid_filter=on' for EXPLAIN FORMAT=JSON SELECT o_totaldiscount, o_totalprice, l_shipdate FROM v1, lineitem WHERE o_orderkey=l_orderkey AND @@ -1906,18 +1866,10 @@ EXPLAIN "i_l_orderkey", "i_l_orderkey_quantity" ], - "key": "i_l_orderkey", + "key": "PRIMARY", "key_length": "4", "used_key_parts": ["l_orderkey"], "ref": ["dbt3_s001.orders.o_orderkey"], - "rowid_filter": { - "range": { - "key": "i_l_shipdate", - "used_key_parts": ["l_shipDATE"] - }, - "rows": 183, - "selectivity_pct": 3.04746045 - }, "rows": 4, "filtered": "REPLACED", "attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'" @@ -1934,7 +1886,7 @@ o_totalprice BETWEEN 200000 AND 220000 AND l_shipdate BETWEEN '1996-10-01' AND '1996-12-01'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE orders range PRIMARY,i_o_orderdate,i_o_totalprice,i_o_totaldiscount i_o_totaldiscount 9 NULL 41 41.00 # 2.44 Using index condition; Using where -1 SIMPLE lineitem ref|filter PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey|i_l_shipdate 4|4 dbt3_s001.orders.o_orderkey 4 (3%) 4.00 (66%) # 100.00 Using where; Using rowid filter +1 SIMPLE lineitem ref PRIMARY,i_l_shipdate,i_l_orderkey,i_l_orderkey_quantity PRIMARY 4 dbt3_s001.orders.o_orderkey 4 6.00 # 66.67 Using where set statement optimizer_switch='rowid_filter=on' for ANALYZE FORMAT=JSON SELECT o_totaldiscount, o_totalprice, l_shipdate FROM v1, lineitem WHERE o_orderkey=l_orderkey AND @@ -1985,29 +1937,17 @@ ANALYZE "i_l_orderkey", "i_l_orderkey_quantity" ], - "key": "i_l_orderkey", + "key": "PRIMARY", "key_length": "4", "used_key_parts": ["l_orderkey"], "ref": ["dbt3_s001.orders.o_orderkey"], - "rowid_filter": { - "range": { - "key": "i_l_shipdate", - "used_key_parts": ["l_shipDATE"] - }, - "rows": 183, - "selectivity_pct": 3.04746045, - "r_rows": 183, - "r_selectivity_pct": 66.66666667, - "r_buffer_size": "REPLACED", - "r_filling_time_ms": "REPLACED" - }, "r_loops": 1, "rows": 4, - "r_rows": 4, + "r_rows": 6, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", "filtered": "REPLACED", - "r_filtered": 100, + "r_filtered": 66.66666667, "attached_condition": "lineitem.l_shipDATE between '1996-10-01' and '1996-12-01'" } } @@ -2246,7 +2186,7 @@ EXPLAIN EXTENDED SELECT * FROM t1 HAVING (7, 9) IN (SELECT t2.i1, t2.i2 FROM t2 WHERE t2.i1 = 3); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL NULL Impossible HAVING -2 SUBQUERY t2 ref i1,i2 i1 5 const 1 100.00 Using index condition; Using where +2 SUBQUERY t2 ref i1,i2 i1 5 const 1 10.00 Using index condition; Using where Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk` from `test`.`t1` having 0 DROP TABLE t1,t2; @@ -2284,7 +2224,7 @@ EXPLAIN EXTENDED SELECT * FROM t1 INNER JOIN t2 ON ( pk1 <> pk2 AND pk1 = a2 ) WHERE b1 <= ( SELECT MAX(b2) FROM t2 WHERE pk2 <= 1 ); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 101 100.00 Using where -1 PRIMARY t1 eq_ref|filter PRIMARY,b1 PRIMARY|b1 4|4 test.t2.a2 1 (87%) 87.00 Using where; Using rowid filter +1 PRIMARY t1 eq_ref PRIMARY,b1 PRIMARY 4 test.t2.a2 1 87.00 Using where 2 SUBQUERY t2 range PRIMARY PRIMARY 4 NULL 1 100.00 Using index condition Warnings: Note 1003 /* select#1 */ select `test`.`t1`.`pk1` AS `pk1`,`test`.`t1`.`a1` AS `a1`,`test`.`t1`.`b1` AS `b1`,`test`.`t2`.`pk2` AS `pk2`,`test`.`t2`.`a2` AS `a2`,`test`.`t2`.`b2` AS `b2` from `test`.`t1` join `test`.`t2` where `test`.`t1`.`pk1` = `test`.`t2`.`a2` and `test`.`t1`.`b1` <= (/* select#2 */ select max(`test`.`t2`.`b2`) from `test`.`t2` where `test`.`t2`.`pk2` <= 1) and `test`.`t2`.`a2` <> `test`.`t2`.`pk2` @@ -2313,14 +2253,6 @@ EXPLAIN "key_length": "4", "used_key_parts": ["pk1"], "ref": ["test.t2.a2"], - "rowid_filter": { - "range": { - "key": "b1", - "used_key_parts": ["b1"] - }, - "rows": 87, - "selectivity_pct": 87 - }, "rows": 1, "filtered": 87, "attached_condition": "t1.b1 <= (subquery#2)" @@ -2387,7 +2319,7 @@ explain select * from t1 where el_index like '10%' and (el_index_60 like '10%' or el_index_60 like '20%'); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 range el_index,el_index_60 el_index 62 NULL 1000 Using where +1 SIMPLE t1 ALL el_index,el_index_60 NULL NULL NULL 10000 Using where drop table t10, t11, t1; # # MDEV-22160: SIGSEGV in st_join_table::save_explain_data on SELECT @@ -2442,8 +2374,8 @@ pk a b c 7 5 k 5 explain SELECT * FROM t1 JOIN t2 WHERE a = c AND pk BETWEEN 4 AND 7 AND a BETWEEN 2 AND 12 AND b != 'foo'; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 6 Using where -1 SIMPLE t1 ref|filter PRIMARY,a,a_2 a|PRIMARY 5|4 test.t2.c 3 (4%) Using where; Using rowid filter +1 SIMPLE t1 range|filter PRIMARY,a,a_2 PRIMARY|a 4|5 NULL 4 (11%) Using index condition; Using where; Using rowid filter +1 SIMPLE t2 ALL NULL NULL NULL NULL 6 Using where; Using join buffer (flat, BNL join) SET optimizer_switch='rowid_filter=off'; SELECT * FROM t1 JOIN t2 WHERE a = c AND pk BETWEEN 4 AND 7 AND a BETWEEN 2 AND 12 AND b != 'foo'; pk a b c @@ -2530,7 +2462,7 @@ EXPLAIN ] }, "rows": 1, - "filtered": 100, + "filtered": 1.587301612, "attached_condition": "t1.f1 is null and t1.f2 is null and (t1.f2 between 'a' and 'z' or t1.f1 = 'a')" } } @@ -2565,7 +2497,7 @@ EXPLAIN ] }, "rows": 1, - "filtered": 100, + "filtered": 1.587301612, "attached_condition": "t1.f1 is null and t1.f2 is null and (t1.f2 between 'a' and 'z' or t1.f1 = 'a')" } } @@ -2592,7 +2524,7 @@ id y x 1 2 1 explain extended select * from t1 join t2 on t1.id = t2.x where t2.y = 2 and t1.id = 1; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 const PRIMARY PRIMARY 4 const 1 # Using index +1 SIMPLE t1 const PRIMARY PRIMARY 4 const 1 # 1 SIMPLE t2 index_merge x,y y,x 5,5 NULL 1 # Using intersect(y,x); Using where; Using index Warnings: Note 1003 select 1 AS `id`,`test`.`t2`.`y` AS `y`,`test`.`t2`.`x` AS `x` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`y` = 2 and `test`.`t2`.`x` = 1 @@ -2614,7 +2546,7 @@ count(*) 6 explain extended select count(*) from t1 where a in (22,83,11) and b=2; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 range b,a a 5 NULL 33 100.00 Using index condition; Using where +1 SIMPLE t1 range b,a a 5 NULL 33 5.90 Using index condition; Using where Warnings: Note 1003 select count(0) AS `count(*)` from `test`.`t1` where `test`.`t1`.`b` = 2 and `test`.`t1`.`a` in (22,83,11) select * from t1 where a in (22,83,11) and b=2; @@ -2717,11 +2649,11 @@ t1.id2 = t1.id); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t3 ALL NULL NULL NULL NULL 1 100.00 Using where 2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 10 100.00 Using where -3 MATERIALIZED t2 range PRIMARY,col_date_key,ch2,id2 ch2 4 NULL 2 100.00 Using where; Using index -3 MATERIALIZED bt1 ALL NULL NULL NULL NULL 10 100.00 Using where; Using join buffer (flat, BNL join) +3 DEPENDENT SUBQUERY t2 range PRIMARY,col_date_key,ch2,id2 ch2 4 NULL 2 100.00 Using where; Using index +3 DEPENDENT SUBQUERY bt1 ALL NULL NULL NULL NULL 10 100.00 Using where; Using join buffer (flat, BNL join) Warnings: Note 1276 Field or reference 'test.t3.id' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t3` where <in_optimizer>(1,<expr_cache><`test`.`t3`.`id`>(exists(/* select#2 */ select 1 from `test`.`t1` where <expr_cache><`test`.`t3`.`id`>(<in_optimizer>(`test`.`t3`.`id`,`test`.`t3`.`id` in ( <materialize> (/* select#3 */ select `test`.`bt1`.`id` from `test`.`t2` join `test`.`t1` `bt1` where `test`.`bt1`.`id` = `test`.`t2`.`pk` and `test`.`t2`.`ch2` <= 'g' ), <primary_index_lookup>(`test`.`t3`.`id` in <temporary table> on distinct_key where `test`.`t3`.`id` = `<subquery3>`.`id`)))) or `test`.`t1`.`id2` = `test`.`t1`.`id` limit 1))) +Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t3` where <in_optimizer>(1,<expr_cache><`test`.`t3`.`id`>(exists(/* select#2 */ select 1 from `test`.`t1` where <expr_cache><`test`.`t3`.`id`>(<in_optimizer>(`test`.`t3`.`id`,<exists>(/* select#3 */ select `test`.`bt1`.`id` from `test`.`t2` join `test`.`t1` `bt1` where `test`.`bt1`.`id` = `test`.`t2`.`pk` and `test`.`t2`.`ch2` <= 'g' and <cache>(`test`.`t3`.`id`) = `test`.`bt1`.`id`))) or `test`.`t1`.`id2` = `test`.`t1`.`id` limit 1))) SELECT 1 FROM t3 WHERE EXISTS ( SELECT 1 FROM t1 WHERE t3.id IN ( SELECT bt1.id FROM t2, t1 AS bt1 @@ -2740,11 +2672,11 @@ t1.id2 = t1.id); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t3 ALL NULL NULL NULL NULL 1 100.00 Using where 2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 10 100.00 Using where -3 MATERIALIZED t2 range PRIMARY,col_date_key,ch2,id2 ch2 4 NULL 2 100.00 Using where; Using index -3 MATERIALIZED bt1 ALL NULL NULL NULL NULL 10 100.00 Using where; Using join buffer (flat, BNL join) +3 DEPENDENT SUBQUERY t2 range PRIMARY,col_date_key,ch2,id2 ch2 4 NULL 2 100.00 Using where; Using index +3 DEPENDENT SUBQUERY bt1 ALL NULL NULL NULL NULL 10 100.00 Using where; Using join buffer (flat, BNL join) Warnings: Note 1276 Field or reference 'test.t3.id' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t3` where <in_optimizer>(1,<expr_cache><`test`.`t3`.`id`>(exists(/* select#2 */ select 1 from `test`.`t1` where <expr_cache><`test`.`t3`.`id`>(<in_optimizer>(`test`.`t3`.`id`,`test`.`t3`.`id` in ( <materialize> (/* select#3 */ select `test`.`bt1`.`id` from `test`.`t2` join `test`.`t1` `bt1` where `test`.`bt1`.`ch` = `test`.`t2`.`ch2` and `test`.`bt1`.`id` = `test`.`t2`.`pk` and `test`.`t2`.`ch2` <= 'g' ), <primary_index_lookup>(`test`.`t3`.`id` in <temporary table> on distinct_key where `test`.`t3`.`id` = `<subquery3>`.`id`)))) or `test`.`t1`.`id2` = `test`.`t1`.`id` limit 1))) +Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t3` where <in_optimizer>(1,<expr_cache><`test`.`t3`.`id`>(exists(/* select#2 */ select 1 from `test`.`t1` where <expr_cache><`test`.`t3`.`id`>(<in_optimizer>(`test`.`t3`.`id`,<exists>(/* select#3 */ select `test`.`bt1`.`id` from `test`.`t2` join `test`.`t1` `bt1` where `test`.`bt1`.`ch` = `test`.`t2`.`ch2` and `test`.`bt1`.`id` = `test`.`t2`.`pk` and `test`.`t2`.`ch2` <= 'g' and <cache>(`test`.`t3`.`id`) = `test`.`bt1`.`id`))) or `test`.`t1`.`id2` = `test`.`t1`.`id` limit 1))) SELECT 1 FROM t3 WHERE EXISTS ( SELECT 1 FROM t1 WHERE t3.id IN ( SELECT bt1.id FROM t2, t1 AS bt1 @@ -2781,7 +2713,7 @@ test.t1 analyze status OK explain extended select count(0) from t1 where id=15066 and (match s against ('+"fttest"' in boolean mode)); id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 fulltext id,s s 0 1 1.64 Using where +1 SIMPLE t1 ref id,s id 5 const 1 100.00 Using where Warnings: Note 1003 select count(0) AS `count(0)` from `test`.`t1` where `test`.`t1`.`id` = 15066 and (match `test`.`t1`.`s` against ('+"fttest"' in boolean mode)) select count(0) from t1 @@ -2899,7 +2831,7 @@ WHERE 1 = 1 AND domain = 'www.mailhost.i-dev.fr' AND timestamp >= DATE_ADD(CURRENT_TIMESTAMP, INTERVAL -1 MONTH) ORDER BY timestamp DESC; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 range ixEventWhoisDomainDomain,ixEventWhoisDomainTimestamp ixEventWhoisDomainTimestamp 4 NULL 1 100.00 Using where +1 SIMPLE t1 range ixEventWhoisDomainDomain,ixEventWhoisDomainTimestamp ixEventWhoisDomainTimestamp 4 NULL 1 28.57 Using where Warnings: Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`domain` AS `domain`,`test`.`t1`.`registrant_name` AS `registrant_name`,`test`.`t1`.`registrant_organization` AS `registrant_organization`,`test`.`t1`.`registrant_street1` AS `registrant_street1`,`test`.`t1`.`registrant_street2` AS `registrant_street2`,`test`.`t1`.`registrant_street3` AS `registrant_street3`,`test`.`t1`.`registrant_street4` AS `registrant_street4`,`test`.`t1`.`registrant_street5` AS `registrant_street5`,`test`.`t1`.`registrant_city` AS `registrant_city`,`test`.`t1`.`registrant_postal_code` AS `registrant_postal_code`,`test`.`t1`.`registrant_country` AS `registrant_country`,`test`.`t1`.`registrant_email` AS `registrant_email`,`test`.`t1`.`registrant_telephone` AS `registrant_telephone`,`test`.`t1`.`administrative_name` AS `administrative_name`,`test`.`t1`.`administrative_organization` AS `administrative_organization`,`test`.`t1`.`administrative_street1` AS `administrative_street1`,`test`.`t1`.`administrative_street2` AS `administrative_street2`,`test`.`t1`.`administrative_street3` AS `administrative_street3`,`test`.`t1`.`administrative_street4` AS `administrative_street4`,`test`.`t1`.`administrative_street5` AS `administrative_street5`,`test`.`t1`.`administrative_city` AS `administrative_city`,`test`.`t1`.`administrative_postal_code` AS `administrative_postal_code`,`test`.`t1`.`administrative_country` AS `administrative_country`,`test`.`t1`.`administrative_email` AS `administrative_email`,`test`.`t1`.`administrative_telephone` AS `administrative_telephone`,`test`.`t1`.`technical_name` AS `technical_name`,`test`.`t1`.`technical_organization` AS `technical_organization`,`test`.`t1`.`technical_street1` AS `technical_street1`,`test`.`t1`.`technical_street2` AS `technical_street2`,`test`.`t1`.`technical_street3` AS `technical_street3`,`test`.`t1`.`technical_street4` AS `technical_street4`,`test`.`t1`.`technical_street5` AS `technical_street5`,`test`.`t1`.`technical_city` AS `technical_city`,`test`.`t1`.`technical_postal_code` AS `technical_postal_code`,`test`.`t1`.`technical_country` AS `technical_country`,`test`.`t1`.`technical_email` AS `technical_email`,`test`.`t1`.`technical_telephone` AS `technical_telephone`,`test`.`t1`.`json` AS `json`,`test`.`t1`.`timestamp` AS `timestamp` from `test`.`t1` where `test`.`t1`.`domain` = 'www.mailhost.i-dev.fr' and `test`.`t1`.`timestamp` >= <cache>(current_timestamp() + interval -1 month) order by `test`.`t1`.`timestamp` desc SET optimizer_switch=@save_optimizer_switch; @@ -2946,7 +2878,7 @@ SELECT * FROM t1 WHERE (a BETWEEN 9 AND 10 OR a IS NULL) AND (b BETWEEN 9 AND 10 OR b = 9) ORDER BY pk LIMIT 1; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 index a,b PRIMARY 4 NULL 73 100.00 Using where +1 SIMPLE t1 index a,b PRIMARY 4 NULL 73 56.05 Using where Warnings: Note 1003 select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b` from `test`.`t1` where (`test`.`t1`.`a` between 9 and 10 or `test`.`t1`.`a` is null) and (`test`.`t1`.`b` between 9 and 10 or `test`.`t1`.`b` = 9) order by `test`.`t1`.`pk` limit 1 ANALYZE @@ -2954,7 +2886,7 @@ SELECT * FROM t1 WHERE (a BETWEEN 9 AND 10 OR a IS NULL) AND (b BETWEEN 9 AND 10 OR b = 9) ORDER BY pk LIMIT 1; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t1 index a,b PRIMARY 4 NULL 3008 3008.00 6.38 0.00 Using where +1 SIMPLE t1 index a,b PRIMARY 4 NULL 3008 3008.00 1.36 0.00 Using where DROP TABLE t1; SET global innodb_stats_persistent= @stats.save; # @@ -3087,7 +3019,7 @@ fi.fh in (6311439873746261694,-397087483897438286, id select_type table type possible_keys key key_len ref rows filtered Extra 1 SIMPLE t index_merge PRIMARY,acli_rid,acli_tp acli_tp,acli_rid 2,767 NULL 2 100.00 Using intersect(acli_tp,acli_rid); Using where; Using index 1 SIMPLE a ref PRIMARY,acei_aclid acei_aclid 8 test.t.id 1 100.00 Using where -1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 17.14 Using where +1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 26.09 Using where Warnings: Note 1003 select `test`.`t`.`id` AS `id`,`test`.`fi`.`id` AS `id`,`test`.`fi`.`aceid` AS `aceid`,`test`.`fi`.`clid` AS `clid`,`test`.`fi`.`fh` AS `fh` from `test`.`acli` `t` join `test`.`acei` `a` join `test`.`filt` `fi` where `test`.`t`.`tp` = 121 and `test`.`a`.`atp` = 1 and `test`.`fi`.`aceid` = `test`.`a`.`id` and `test`.`a`.`aclid` = `test`.`t`.`id` and `test`.`t`.`rid` = 'B5FCC8C7111E4E3CBC21AAF5012F59C2' and `test`.`fi`.`fh` in (6311439873746261694,-397087483897438286,8518228073041491534,-5420422472375069774) set statement optimizer_switch='rowid_filter=off' for select t.id, fi.* @@ -3114,7 +3046,7 @@ fi.fh in (6311439873746261694,-397087483897438286, id select_type table type possible_keys key key_len ref rows filtered Extra 1 SIMPLE t index_merge PRIMARY,acli_rid,acli_tp acli_tp,acli_rid 2,767 NULL 2 100.00 Using intersect(acli_tp,acli_rid); Using where; Using index 1 SIMPLE a ref PRIMARY,acei_aclid acei_aclid 8 test.t.id 1 100.00 Using where -1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 17.14 Using where +1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 26.09 Using where Warnings: Note 1003 select `test`.`t`.`id` AS `id`,`test`.`fi`.`id` AS `id`,`test`.`fi`.`aceid` AS `aceid`,`test`.`fi`.`clid` AS `clid`,`test`.`fi`.`fh` AS `fh` from `test`.`acli` `t` join `test`.`acei` `a` join `test`.`filt` `fi` where `test`.`t`.`tp` = 121 and `test`.`a`.`atp` = 1 and `test`.`fi`.`aceid` = `test`.`a`.`id` and `test`.`a`.`aclid` = `test`.`t`.`id` and `test`.`t`.`rid` = 'B5FCC8C7111E4E3CBC21AAF5012F59C2' and `test`.`fi`.`fh` in (6311439873746261694,-397087483897438286,8518228073041491534,-5420422472375069774) set statement optimizer_switch='rowid_filter=on' for select t.id, fi.* @@ -3143,7 +3075,7 @@ fi.fh in (6311439873746261694,-397087483897438286, id select_type table type possible_keys key key_len ref rows filtered Extra 1 SIMPLE t index_merge PRIMARY,acli_rid,acli_tp acli_tp,acli_rid 2,767 NULL 2 100.00 Using intersect(acli_tp,acli_rid); Using where; Using index 1 SIMPLE a ref PRIMARY,acei_aclid acei_aclid 8 test.t.id 1 100.00 Using where; Using join buffer (flat, BKA join); Rowid-ordered scan -1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 17.14 Using where; Using join buffer (incremental, BKA join); Rowid-ordered scan +1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 26.09 Using where; Using join buffer (incremental, BKA join); Rowid-ordered scan Warnings: Note 1003 select `test`.`t`.`id` AS `id`,`test`.`fi`.`id` AS `id`,`test`.`fi`.`aceid` AS `aceid`,`test`.`fi`.`clid` AS `clid`,`test`.`fi`.`fh` AS `fh` from `test`.`acli` `t` join `test`.`acei` `a` join `test`.`filt` `fi` where `test`.`t`.`tp` = 121 and `test`.`a`.`atp` = 1 and `test`.`fi`.`aceid` = `test`.`a`.`id` and `test`.`a`.`aclid` = `test`.`t`.`id` and `test`.`t`.`rid` = 'B5FCC8C7111E4E3CBC21AAF5012F59C2' and `test`.`fi`.`fh` in (6311439873746261694,-397087483897438286,8518228073041491534,-5420422472375069774) set statement optimizer_switch='rowid_filter=off' for select t.id, fi.* @@ -3170,7 +3102,7 @@ fi.fh in (6311439873746261694,-397087483897438286, id select_type table type possible_keys key key_len ref rows filtered Extra 1 SIMPLE t index_merge PRIMARY,acli_rid,acli_tp acli_tp,acli_rid 2,767 NULL 2 100.00 Using intersect(acli_tp,acli_rid); Using where; Using index 1 SIMPLE a ref PRIMARY,acei_aclid acei_aclid 8 test.t.id 1 100.00 Using where; Using join buffer (flat, BKA join); Rowid-ordered scan -1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 17.14 Using where; Using join buffer (incremental, BKA join); Rowid-ordered scan +1 SIMPLE fi ref filt_aceid,filt_fh filt_aceid 8 test.a.id 1 26.09 Using where; Using join buffer (incremental, BKA join); Rowid-ordered scan Warnings: Note 1003 select `test`.`t`.`id` AS `id`,`test`.`fi`.`id` AS `id`,`test`.`fi`.`aceid` AS `aceid`,`test`.`fi`.`clid` AS `clid`,`test`.`fi`.`fh` AS `fh` from `test`.`acli` `t` join `test`.`acei` `a` join `test`.`filt` `fi` where `test`.`t`.`tp` = 121 and `test`.`a`.`atp` = 1 and `test`.`fi`.`aceid` = `test`.`a`.`id` and `test`.`a`.`aclid` = `test`.`t`.`id` and `test`.`t`.`rid` = 'B5FCC8C7111E4E3CBC21AAF5012F59C2' and `test`.`fi`.`fh` in (6311439873746261694,-397087483897438286,8518228073041491534,-5420422472375069774) set statement optimizer_switch='rowid_filter=on' for select t.id, fi.* @@ -3260,7 +3192,8 @@ ANALYZE "join_type": "BKA", "mrr_type": "Rowid-ordered scan", "attached_condition": "a.atp = 1", - "r_filtered": 100 + "r_filtered": 100, + "r_unpack_time_ms": "REPLACED" } }, { @@ -3278,7 +3211,7 @@ ANALYZE "r_rows": 5, "r_table_time_ms": "REPLACED", "r_other_time_ms": "REPLACED", - "filtered": 17.1428566, + "filtered": 26.08628654, "r_filtered": 100 }, "buffer_type": "incremental", @@ -3286,7 +3219,8 @@ ANALYZE "join_type": "BKA", "mrr_type": "Rowid-ordered scan", "attached_condition": "fi.fh in (6311439873746261694,-397087483897438286,8518228073041491534,-5420422472375069774)", - "r_filtered": 40 + "r_filtered": 40, + "r_unpack_time_ms": "REPLACED" } } ] @@ -3333,7 +3267,7 @@ WHERE t1.c1 NOT IN (SELECT t2.c1 FROM t2, t1 AS a1 WHERE t2.i1 = t1.pk AND t2.i1 IS NOT NULL); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 15 100.00 Using where -2 DEPENDENT SUBQUERY t2 ref|filter c1,i1 c1|i1 3|5 func 6 (33%) 33.33 Using where; Full scan on NULL key; Using rowid filter +2 DEPENDENT SUBQUERY t2 ref|filter c1,i1 c1|i1 3|5 func 6 (33%) 11.11 Using where; Full scan on NULL key; Using rowid filter 2 DEPENDENT SUBQUERY a1 ALL NULL NULL NULL NULL 15 100.00 Using join buffer (flat, BNL join) Warnings: Note 1276 Field or reference 'test.t1.pk' of SELECT #2 was resolved in SELECT #1 diff --git a/mysql-test/main/show_explain_json.result b/mysql-test/main/show_explain_json.result index daf8a3f5ef4..a5c441af5b8 100644 --- a/mysql-test/main/show_explain_json.result +++ b/mysql-test/main/show_explain_json.result @@ -51,13 +51,13 @@ SHOW EXPLAIN { "table": { "table_name": "t1", - "access_type": "index", + "access_type": "range", "possible_keys": ["a"], "key": "a", "key_length": "5", "used_key_parts": ["a"], - "rows": 1000, - "filtered": 99.90000153, + "rows": 999, + "filtered": 100, "attached_condition": "t1.a < 100000", "using_index": true } diff --git a/mysql-test/main/sp.result b/mysql-test/main/sp.result index 37fdf62a6a9..0911a5276dd 100644 --- a/mysql-test/main/sp.result +++ b/mysql-test/main/sp.result @@ -2173,8 +2173,8 @@ create procedure bug3734 (param1 varchar(100)) select * from t3 where match (title,body) against (param1)| call bug3734('database')| id title body -5 MySQL vs. YourSQL In the following database comparison ... 1 MySQL Tutorial DBMS stands for DataBase ... +5 MySQL vs. YourSQL In the following database comparison ... call bug3734('Security')| id title body 6 MySQL Security When configured properly, MySQL ... diff --git a/mysql-test/main/status.result b/mysql-test/main/status.result index 78a39ee2ecf..ae64f370e45 100644 --- a/mysql-test/main/status.result +++ b/mysql-test/main/status.result @@ -71,10 +71,10 @@ a 6 show status like 'last_query_cost'; Variable_name Value -Last_query_cost 13.542725 +Last_query_cost 0.017820 show status like 'last_query_cost'; Variable_name Value -Last_query_cost 13.542725 +Last_query_cost 0.017820 select 1; 1 1 @@ -134,20 +134,20 @@ a 1 SHOW SESSION STATUS LIKE 'Last_query_cost'; Variable_name Value -Last_query_cost 1.501709 +Last_query_cost 0.010313 EXPLAIN SELECT a FROM t1; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 2 SHOW SESSION STATUS LIKE 'Last_query_cost'; Variable_name Value -Last_query_cost 1.501709 +Last_query_cost 0.010313 SELECT a FROM t1 UNION SELECT a FROM t1 ORDER BY a; a 1 2 SHOW SESSION STATUS LIKE 'Last_query_cost'; Variable_name Value -Last_query_cost 0.000000 +Last_query_cost 0.010313 EXPLAIN SELECT a FROM t1 UNION SELECT a FROM t1 ORDER BY a; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 2 @@ -155,25 +155,25 @@ id select_type table type possible_keys key key_len ref rows Extra NULL UNION RESULT <union1,2> ALL NULL NULL NULL NULL NULL Using filesort SHOW SESSION STATUS LIKE 'Last_query_cost'; Variable_name Value -Last_query_cost 0.000000 +Last_query_cost 0.010313 SELECT a IN (SELECT a FROM t1) FROM t1 LIMIT 1; a IN (SELECT a FROM t1) 1 SHOW SESSION STATUS LIKE 'Last_query_cost'; Variable_name Value -Last_query_cost 0.000000 +Last_query_cost 0.010313 SELECT (SELECT a FROM t1 LIMIT 1) x FROM t1 LIMIT 1; x 1 SHOW SESSION STATUS LIKE 'Last_query_cost'; Variable_name Value -Last_query_cost 0.000000 +Last_query_cost 0.010313 SELECT * FROM t1 a, t1 b LIMIT 1; a a 1 1 SHOW SESSION STATUS LIKE 'Last_query_cost'; Variable_name Value -Last_query_cost 3.953418 +Last_query_cost 0.021119 DROP TABLE t1; connect con1,localhost,root,,; show status like 'com_show_status'; diff --git a/mysql-test/main/table_elim.result b/mysql-test/main/table_elim.result index 8ae5522e8bc..a99afdef3ec 100644 --- a/mysql-test/main/table_elim.result +++ b/mysql-test/main/table_elim.result @@ -337,7 +337,7 @@ id select_type table type possible_keys key key_len ref rows Extra explain select t1.a from t1 left join t2 on t2.pk between 0.5 and 1.5; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 4 -1 SIMPLE t2 index PRIMARY PRIMARY 4 NULL 2 Using where; Using index +1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 2 Using where; Using index explain select t1.a from t1 left join t2 on t2.pk between 10 and 10; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 4 @@ -408,7 +408,7 @@ select t1.* from t1 left join t2 on t2.pk=3 or t2.pk= 4; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 4 -1 SIMPLE t2 index PRIMARY PRIMARY 4 NULL 2 Using where; Using index +1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 2 Using where; Using index explain select t1.* from t1 left join t2 on t2.pk=3 or t2.pk= 3; @@ -419,7 +419,7 @@ select t1.* from t1 left join t2 on (t2.pk=3 and t2.b=3) or (t2.pk= 4 and t2.b=3); id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 4 -1 SIMPLE t2 ALL PRIMARY NULL NULL NULL 2 Using where +1 SIMPLE t2 range PRIMARY PRIMARY 4 NULL 2 Using where drop table t1, t2; # # LPBUG#523593: Running RQG optimizer_no_subquery crashes MariaDB @@ -562,7 +562,10 @@ LEFT JOIN t1 ON t4.f1 = t1.f1 JOIN t5 ON t4.f3 ON t3.f1 = t5.f5 ON t2.f4 = t3.f4 WHERE t3.f2 ; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +1 SIMPLE t3 ALL NULL NULL NULL NULL 2 Using where +1 SIMPLE t5 ref f5 f5 5 test.t3.f1 2 Using where; Using index +1 SIMPLE t4 ALL NULL NULL NULL NULL 3 Using where +1 SIMPLE t2 ref f4 f4 1003 test.t3.f4 2 Using where # ^^ The above must not produce a QEP of t3,t5,t2,t4 # as that violates the "no interleaving of outer join nests" rule. DROP TABLE t1,t2,t3,t4,t5; diff --git a/mysql-test/main/type_ranges.result b/mysql-test/main/type_ranges.result index 02b6c79bdf5..012d1fc67ce 100644 --- a/mysql-test/main/type_ranges.result +++ b/mysql-test/main/type_ranges.result @@ -173,12 +173,12 @@ PRIMARY KEY (auto) ); INSERT IGNORE INTO t2 (string,mediumblob_col,new_field) SELECT string,mediumblob_col,new_field from t1 where auto > 10; Warnings: +Warning 1265 Data truncated for column 'new_field' at row 1 Warning 1265 Data truncated for column 'new_field' at row 2 Warning 1265 Data truncated for column 'new_field' at row 3 Warning 1265 Data truncated for column 'new_field' at row 4 Warning 1265 Data truncated for column 'new_field' at row 5 Warning 1265 Data truncated for column 'new_field' at row 6 -Warning 1265 Data truncated for column 'new_field' at row 7 select * from t2; auto string mediumblob_col new_field 1 2 2 ne diff --git a/mysql-test/main/type_time_6065.result b/mysql-test/main/type_time_6065.result index 75b272430c1..a9f64fd9870 100644 --- a/mysql-test/main/type_time_6065.result +++ b/mysql-test/main/type_time_6065.result @@ -2267,7 +2267,7 @@ outr.col_varchar_key IS NULL ); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY outr system col_datetime_key NULL NULL NULL 1 100.00 -1 PRIMARY innr ref col_int_key col_int_key 4 const 2 100.00 Using where; FirstMatch(outr) +1 PRIMARY innr ref col_int_key col_int_key 4 const 2 50.00 Using where; FirstMatch(outr) 1 PRIMARY outr2 index col_time_key col_time_key 4 NULL 20 100.00 Using where; Using index; Using join buffer (flat, BNL join) Warnings: Note 1003 select 1 AS `col_int_nokey` from `test`.`t3` `outr2` semi join (`test`.`t1` `innr`) where `test`.`innr`.`col_int_key` = 1 and `test`.`innr`.`pk` >= `test`.`innr`.`col_int_nokey` and `test`.`outr2`.`col_time_key` > '2001-11-04 19:07:55' diff --git a/mysql-test/main/user_var.result b/mysql-test/main/user_var.result index 122a4233e24..924c252b951 100644 --- a/mysql-test/main/user_var.result +++ b/mysql-test/main/user_var.result @@ -22,7 +22,7 @@ i @vv1:=if(sv1.i,1,0) @vv2:=if(sv2.i,1,0) @vv3:=if(sv3.i,1,0) @vv1+@vv2+@vv3 2 1 0 0 1 explain select * from t1 where i=@vv1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ALL i NULL NULL NULL 3 Using where +1 SIMPLE t1 ref i i 4 const 2 select @vv1,i,v from t1 where i=@vv1; @vv1 i v 1 1 1 @@ -35,7 +35,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 index NULL i 4 NULL 3 Using where; Using index explain select * from t1 where i=@vv1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 ALL i NULL NULL NULL 3 Using where +1 SIMPLE t1 ref i i 4 const 2 drop table t1,t2; set @a=0,@b=0; select @a:=10, @b:=1, @a > @b, @a < @b; diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index 9f13ca7a424..0f78ea90bdd 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -86,6 +86,7 @@ my_bool my_getopt_prefix_matching= 1; my_bool my_handle_options_init_variables = 1; my_getopt_value my_getopt_get_addr= 0; +my_getopt_adjust my_getopt_adjust_value= 0; static void default_reporter(enum loglevel level, const char *format, ...) { @@ -897,7 +898,12 @@ static int setval(const struct my_option *opts, void *value, char *argument, goto ret; }; } + + if (opts->var_type & GET_ADJUST_VALUE) + (*my_getopt_adjust_value)(opts, value); + validate_value(opts->name, argument, option_file); + DBUG_RETURN(0); ret: diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 530a33193d3..24b68fedd94 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -174,7 +174,7 @@ SET (SQL_SOURCE sql_tvc.cc sql_tvc.h opt_split.cc rowid_filter.cc rowid_filter.h - optimizer_costs.h + optimizer_costs.h optimizer_defaults.h opt_trace.cc table_cache.cc encryption.cc temporary_tables.cc json_table.cc diff --git a/sql/filesort.cc b/sql/filesort.cc index 071108f7e91..96eabfdab89 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -1132,8 +1132,6 @@ write_keys(Sort_param *param, SORT_INFO *fs_info, uint count, for (uint ix= 0; ix < count; ++ix) { uchar *record= fs_info->get_sorted_record(ix); - - if (my_b_write(tempfile, record, param->get_record_length(record))) DBUG_RETURN(1); /* purecov: inspected */ } @@ -1678,7 +1676,7 @@ ulong read_to_buffer(IO_CACHE *fromfile, Merge_chunk *buffpek, num_bytes_read= bytes_to_read; buffpek->init_current_key(); - buffpek->advance_file_position(num_bytes_read); /* New filepos */ + buffpek->advance_file_position(num_bytes_read); /* New filepos */ buffpek->decrement_rowcount(count); buffpek->set_mem_count(count); return (ulong) num_bytes_read; diff --git a/sql/filesort_utils.cc b/sql/filesort_utils.cc index 854033cc8d8..1aa17deb16e 100644 --- a/sql/filesort_utils.cc +++ b/sql/filesort_utils.cc @@ -19,7 +19,7 @@ #include "sql_const.h" #include "sql_sort.h" #include "table.h" - +#include "optimizer_defaults.h" PSI_memory_key key_memory_Filesort_buffer_sort_keys; @@ -58,7 +58,6 @@ const LEX_CSTRING filesort_names[]= Cost of the operation. */ -static double get_qsort_sort_cost(ha_rows num_rows, bool with_addon_fields) { const double row_copy_cost= with_addon_fields ? DEFAULT_ROW_COPY_COST : @@ -106,12 +105,13 @@ double get_pq_sort_cost(size_t num_rows, size_t queue_size, static double get_merge_cost(ha_rows num_elements, ha_rows num_buffers, - size_t elem_size, double compare_cost) + size_t elem_size, double compare_cost, + double disk_read_cost) { /* 2 -> 1 read + 1 write */ const double io_cost= (2.0 * (num_elements * elem_size + DISK_CHUNK_SIZE - 1) / - DISK_CHUNK_SIZE); + DISK_CHUNK_SIZE) * disk_read_cost; /* 2 -> 1 insert, 1 pop for the priority queue used to merge the buffers. */ const double cpu_cost= (2.0 * num_elements * log2(1.0 + num_buffers) * compare_cost) * PQ_SORT_SLOWNESS_CORRECTION_FACTOR; @@ -131,6 +131,7 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows, ha_rows num_keys_per_buffer, size_t elem_size, double key_compare_cost, + double disk_read_cost, bool with_addon_fields) { DBUG_ASSERT(num_keys_per_buffer != 0); @@ -162,7 +163,7 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows, total_cost+= num_merge_calls * get_merge_cost(num_keys_per_buffer * MERGEBUFF, MERGEBUFF, elem_size, - key_compare_cost); + key_compare_cost, disk_read_cost); // # of records in remaining buffers. last_n_elems+= num_remaining_buffs * num_keys_per_buffer; @@ -170,7 +171,7 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows, // Cost of merge sort of remaining buffers. total_cost+= get_merge_cost(last_n_elems, 1 + num_remaining_buffs, elem_size, - key_compare_cost); + key_compare_cost, disk_read_cost); num_buffers= num_merge_calls; num_keys_per_buffer*= MERGEBUFF; @@ -179,7 +180,7 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows, // Simulate final merge_buff call. last_n_elems+= num_keys_per_buffer * num_buffers; total_cost+= get_merge_cost(last_n_elems, 1 + num_buffers, elem_size, - key_compare_cost); + key_compare_cost, disk_read_cost); return total_cost; } @@ -238,7 +239,7 @@ void Sort_costs::compute_pq_sort_costs(Sort_param *param, ha_rows num_rows, { costs[PQ_SORT_ORDER_BY_FIELDS]= get_pq_sort_cost(num_rows, queue_size, false) + - param->sort_form->file->ha_rnd_pos_time(MY_MIN(queue_size - 1, num_rows)); + param->sort_form->file->ha_rnd_pos_call_time(MY_MIN(queue_size - 1, num_rows)); } /* Calculate cost with addon fields */ @@ -272,9 +273,10 @@ void Sort_costs::compute_merge_sort_costs(Sort_param *param, costs[MERGE_SORT_ORDER_BY_FIELDS]= get_merge_many_buffs_cost_fast(num_rows, num_available_keys, row_length, DEFAULT_KEY_COMPARE_COST, + default_optimizer_costs.disk_read_cost, false) + - param->sort_form->file->ha_rnd_pos_time(MY_MIN(param->limit_rows, - num_rows)); + param->sort_form->file->ha_rnd_pos_call_time(MY_MIN(param->limit_rows, + num_rows)); if (with_addon_fields) { @@ -286,6 +288,7 @@ void Sort_costs::compute_merge_sort_costs(Sort_param *param, costs[MERGE_SORT_ALL_FIELDS]= get_merge_many_buffs_cost_fast(num_rows, num_available_keys, row_length, DEFAULT_KEY_COMPARE_COST, + DISK_READ_COST_THD(thd), true); } diff --git a/sql/filesort_utils.h b/sql/filesort_utils.h index b97fc4632c5..73aa2f76a18 100644 --- a/sql/filesort_utils.h +++ b/sql/filesort_utils.h @@ -352,6 +352,7 @@ extern const LEX_CSTRING filesort_names[]; double cost_of_filesort(TABLE *table, ORDER *order_by, ha_rows rows_to_read, ha_rows limit_rows, enum sort_type *used_sort_type); +double get_qsort_sort_cost(ha_rows num_rows, bool with_addon_fields); int compare_packed_sort_keys(void *sort_keys, unsigned char **a, unsigned char **b); qsort2_cmp get_packed_keys_compare_ptr(); diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 06388968948..f1d5fd7e4d3 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -9737,24 +9737,27 @@ uint ha_partition::get_biggest_used_partition(uint *part_index) time for scan */ -double ha_partition::scan_time() +IO_AND_CPU_COST ha_partition::scan_time() { - double scan_time= 0; + IO_AND_CPU_COST scan_time= {0,0}; uint i; DBUG_ENTER("ha_partition::scan_time"); for (i= bitmap_get_first_set(&m_part_info->read_partitions); i < m_tot_parts; i= bitmap_get_next_set(&m_part_info->read_partitions, i)) - scan_time+= m_file[i]->scan_time(); + { + IO_AND_CPU_COST cost= m_file[i]->scan_time(); + scan_time.io+= cost.io; + scan_time.cpu+= cost.cpu; + } if (m_tot_parts) { /* Add TABLE_SCAN_SETUP_COST for partitions to make cost similar to in ha_scan_time() */ - scan_time+= (TABLE_SCAN_SETUP_COST * avg_io_cost() * (m_tot_parts - 1) / - optimizer_cache_cost); + scan_time.cpu+= TABLE_SCAN_SETUP_COST * (m_tot_parts - 1); } DBUG_RETURN(scan_time); } @@ -9769,34 +9772,78 @@ double ha_partition::scan_time() @return time for scanning index inx */ -double ha_partition::key_scan_time(uint inx) +IO_AND_CPU_COST ha_partition::key_scan_time(uint inx, ha_rows rows) { - double scan_time= 0; + IO_AND_CPU_COST scan_time= {0,0}; uint i; + uint partitions= bitmap_bits_set(&m_part_info->read_partitions); + ha_rows rows_per_part; DBUG_ENTER("ha_partition::key_scan_time"); + + if (partitions == 0) + DBUG_RETURN(scan_time); + set_if_bigger(rows, 1); + rows_per_part= (rows + partitions - 1)/partitions; + for (i= bitmap_get_first_set(&m_part_info->read_partitions); i < m_tot_parts; i= bitmap_get_next_set(&m_part_info->read_partitions, i)) - scan_time+= m_file[i]->key_scan_time(inx); + { + IO_AND_CPU_COST cost= m_file[i]->key_scan_time(inx, rows_per_part); + scan_time.io+= cost.io; + scan_time.cpu+= cost.cpu; + } DBUG_RETURN(scan_time); } -double ha_partition::keyread_time(uint inx, uint ranges, ha_rows rows) +IO_AND_CPU_COST ha_partition::keyread_time(uint inx, ulong ranges, ha_rows rows, + ulonglong blocks) { - double read_time= 0; + IO_AND_CPU_COST read_time= {0,0}; uint i; + uint partitions= bitmap_bits_set(&m_part_info->read_partitions); DBUG_ENTER("ha_partition::keyread_time"); - if (!ranges) - DBUG_RETURN(handler::keyread_time(inx, ranges, rows)); + if (partitions == 0) + DBUG_RETURN(read_time); + + ha_rows rows_per_part= (rows + partitions - 1)/partitions; for (i= bitmap_get_first_set(&m_part_info->read_partitions); i < m_tot_parts; i= bitmap_get_next_set(&m_part_info->read_partitions, i)) - read_time+= m_file[i]->keyread_time(inx, ranges, rows); + { + IO_AND_CPU_COST cost= m_file[i]->keyread_time(inx, ranges, rows_per_part, + blocks); + read_time.io+= cost.io; + read_time.cpu+= cost.cpu; + } + /* Add that we have to do a key lookup for all ranges in all partitions */ + read_time.cpu= (partitions-1) * ranges * KEY_LOOKUP_COST; DBUG_RETURN(read_time); } +IO_AND_CPU_COST ha_partition::rnd_pos_time(ha_rows rows) +{ + IO_AND_CPU_COST read_time= {0,0}; + uint i; + uint partitions= bitmap_bits_set(&m_part_info->read_partitions); + if (partitions == 0) + return read_time; + + ha_rows rows_per_part= (rows + partitions - 1)/partitions; + for (i= bitmap_get_first_set(&m_part_info->read_partitions); + i < m_tot_parts; + i= bitmap_get_next_set(&m_part_info->read_partitions, i)) + { + IO_AND_CPU_COST cost= m_file[i]->rnd_pos_time(rows_per_part); + read_time.io+= cost.io; + read_time.cpu+= cost.cpu; + } + return read_time; +} + + /** Find number of records in a range. @param inx Index number @@ -9853,6 +9900,8 @@ ha_rows ha_partition::records_in_range(uint inx, const key_range *min_key, if (estimated_rows && checked_rows && checked_rows >= min_rows_to_check) { + /* We cannot use page ranges when there is more than one partion */ + *pages= unused_page_range; DBUG_PRINT("info", ("records_in_range(inx %u): %lu (%lu * %lu / %lu)", inx, @@ -9866,6 +9915,8 @@ ha_rows ha_partition::records_in_range(uint inx, const key_range *min_key, DBUG_PRINT("info", ("records_in_range(inx %u): %lu", inx, (ulong) estimated_rows)); + /* We cannot use page ranges when there is more than one partion */ + *pages= unused_page_range; DBUG_RETURN(estimated_rows); } @@ -9896,33 +9947,6 @@ ha_rows ha_partition::estimate_rows_upper_bound() } -/* - Get time to read - - SYNOPSIS - read_time() - index Index number used - ranges Number of ranges - rows Number of rows - - RETURN VALUE - time for read - - DESCRIPTION - This will be optimised later to include whether or not the index can - be used with partitioning. To achieve we need to add another parameter - that specifies how many of the index fields that are bound in the ranges. - Possibly added as a new call to handlers. -*/ - -double ha_partition::read_time(uint index, uint ranges, ha_rows rows) -{ - DBUG_ENTER("ha_partition::read_time"); - - DBUG_RETURN(get_open_file_sample()->read_time(index, ranges, rows)); -} - - /** Number of rows in table. see handler.h @@ -12168,6 +12192,18 @@ ha_partition::can_convert_blob(const Field_blob* field, return true; } +/* + Get table costs for the current statement that should be stored in + handler->cost variables. + + When we want to support many different table handlers, we should set + m_file[i]->costs to point to an unique cost structure per open + instance and call something similar as + TABLE_SHARE::update_optimizer_costs(handlerton *hton) and + handler::update_optimizer_costs(&costs) on it. +*/ + + void ha_partition::set_optimizer_costs(THD *thd) { handler::set_optimizer_costs(thd); @@ -12177,6 +12213,17 @@ void ha_partition::set_optimizer_costs(THD *thd) m_file[i]->set_optimizer_costs(thd); } +/* + Get unique table costs for the first instance of the handler and store + in table->share +*/ + +void ha_partition::update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + uint i= bitmap_get_first_set(&m_part_info->read_partitions); + m_file[i]->update_optimizer_costs(costs); +} + struct st_mysql_storage_engine partition_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; diff --git a/sql/ha_partition.h b/sql/ha_partition.h index 985d6d1eaec..9708b5fac86 100644 --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -1031,17 +1031,15 @@ public: /* Called in test_quick_select to determine if indexes should be used. */ - double scan_time() override; + IO_AND_CPU_COST scan_time() override; - double key_scan_time(uint inx) override; + IO_AND_CPU_COST key_scan_time(uint inx, ha_rows rows) override; - double keyread_time(uint inx, uint ranges, ha_rows rows) override; + IO_AND_CPU_COST keyread_time(uint inx, ulong ranges, ha_rows rows, + ulonglong blocks) override; + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override; /* - The next method will never be called if you do not implement indexes. - */ - double read_time(uint index, uint ranges, ha_rows rows) override; - /* For the given range how many records are estimated to be in this range. Used by optimiser to calculate cost of using a particular index. */ @@ -1645,5 +1643,6 @@ public: const Field_blob* field, const Column_definition& new_field) const override; void set_optimizer_costs(THD *thd); + void update_optimizer_costs(OPTIMIZER_COSTS *costs); }; #endif /* HA_PARTITION_INCLUDED */ diff --git a/sql/handler.cc b/sql/handler.cc index adb923eed0a..6179496a88e 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -46,6 +46,7 @@ #include "ha_sequence.h" #include "rowid_filter.h" #include "mysys_err.h" +#include "optimizer_defaults.h" #ifdef WITH_PARTITION_STORAGE_ENGINE #include "ha_partition.h" @@ -621,8 +622,44 @@ int ha_finalize_handlerton(st_plugin_int *plugin) } -const char *hton_no_exts[]= { 0 }; +/* + Get a pointer to the global engine optimizer costs (like + innodb.disk_read_cost) and store the pointer in the handlerton. + + This is called once when a handlerton is created. + We also update the not set global costs with the default costs + to allow information_schema to print the real used values. +*/ + +static bool update_optimizer_costs(handlerton *hton) +{ + OPTIMIZER_COSTS costs= default_optimizer_costs; + LEX_CSTRING *name= hton_name(hton); + + if (hton->update_optimizer_costs) + hton->update_optimizer_costs(&costs); + + mysql_mutex_lock(&LOCK_optimizer_costs); + hton->optimizer_costs= get_or_create_optimizer_costs(name->str, + name->length); + if (!hton->optimizer_costs) + { + mysql_mutex_unlock(&LOCK_optimizer_costs); + return 1; // OOM + } + + /* Update not set values from current default costs */ + for (uint i=0 ; i < sizeof(OPTIMIZER_COSTS)/sizeof(double) ; i++) + { + double *var= ((double*) hton->optimizer_costs)+i; + if (*var == OPTIMIZER_COST_UNDEF) + *var= ((double*) &costs)[i]; + } + mysql_mutex_unlock(&LOCK_optimizer_costs); + return 0; +} +const char *hton_no_exts[]= { 0 }; int ha_initialize_handlerton(st_plugin_int *plugin) { @@ -725,6 +762,12 @@ int ha_initialize_handlerton(st_plugin_int *plugin) hton->savepoint_offset= savepoint_alloc_size; savepoint_alloc_size+= tmp; hton2plugin[hton->slot]=plugin; + + if (plugin->plugin->type == MYSQL_STORAGE_ENGINE_PLUGIN && + !(hton->flags & HTON_HIDDEN) && + update_optimizer_costs(hton)) + goto err_deinit; + if (hton->prepare) { total_ha_2pc++; @@ -764,7 +807,6 @@ int ha_initialize_handlerton(st_plugin_int *plugin) resolve_sysvar_table_options(hton); update_discovery_counters(hton, 1); - DBUG_RETURN(0); err_deinit: @@ -3222,58 +3264,97 @@ LEX_CSTRING *handler::engine_name() return hton_name(ht); } - /* - It is assumed that the value of the parameter 'ranges' can be only 0 or 1. - If ranges == 1 then the function returns the cost of index only scan - by index 'keyno' of one range containing 'rows' key entries. - If ranges == 0 then the function returns only the cost of copying - those key entries into the engine buffers. - - This function doesn't take in account into copying the key to record - (KEY_COPY_COST) or comparing the key to the where clause (WHERE_COST) + Calculate cost for an index scan for given index and number of records. + + @param index Index to use + @param ranges Number of ranges (b-tree dives in case of b-tree). + Used by partition engine + @param rows Number of expected rows + @param blocks Number of disk blocks to read (from range optimizer). + 0 if not known + + This function does not take in account into looking up the key, + copying the key to record and finding the next key. These cost are + handled in ha_keyread_time() */ -double handler::keyread_time(uint index, uint ranges, ha_rows rows) +IO_AND_CPU_COST handler::keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) { - size_t len; - double cost; - DBUG_ASSERT(ranges == 0 || ranges == 1); - len= table->key_info[index].key_length + ref_length; - if (table->file->is_clustering_key(index)) - len= table->s->stored_rec_length; + IO_AND_CPU_COST cost; + ulonglong io_blocks= 0; + DBUG_ASSERT(ranges > 0); - cost= ((double)rows*len/(stats.block_size+1) * - INDEX_BLOCK_COPY_COST(table->in_use)); - /* - We divide the cost with optimizer_cache_cost as ha_keyread_time() - and ha_key_scan_time() will multiply the result value with - optimizer_cache_cost and we want to keep the above 'memory operation' - cost unaffected by this multiplication. - */ - cost/= optimizer_cache_cost; - if (ranges) + /* memory engine has stats.block_size == 0 */ + if (stats.block_size) { - uint keys_per_block= (uint) (stats.block_size*3/4/len+1); - /* - We let the cost grow slowly in proportion to number of rows to - promote indexes with less rows. - We do not calculate exact number of block reads as then index - only reads will be more costly than normal reads, especially - compared to InnoDB clustered keys. - - KEY_LOOKUP_COST is the cost of finding the first key in the - range. Finding the next key is usually a fast operation so we - don't count it here, it is taken into account in - ha_keyread_and_copy_time() - */ - cost+= (((double) (rows / keys_per_block) + KEY_LOOKUP_COST) * - avg_io_cost()); + if (!blocks) + { + /* Estimate length of index data */ + if (rows <= 1) // EQ_REF optimization + { + blocks= 1; + io_blocks= (stats.block_size + IO_SIZE - 1)/ IO_SIZE; + } + else + { + size_t len= table->key_storage_length(index); + blocks= ((ulonglong) ((rows * len / INDEX_BLOCK_FILL_FACTOR_DIV * + INDEX_BLOCK_FILL_FACTOR_MUL + + stats.block_size-1)) / stats.block_size + + (ranges - 1)); + io_blocks= blocks * stats.block_size / IO_SIZE; + } + } + else + io_blocks= blocks * stats.block_size / IO_SIZE; } + cost.io= (double) io_blocks * avg_io_cost(); + cost.cpu= blocks * INDEX_BLOCK_COPY_COST; return cost; } +/* + Cost of doing a set of range scans and finding the key position. + This function is used both with index scans (in which case there should be + an additional KEY_COPY_COST) and when normal index + fetch row scan, + in which case there should an additional rnd_pos_time() cost. +*/ + +double handler::ha_keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) +{ + if (rows < ranges) + rows= ranges; + IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks); + return (cost.io * DISK_READ_RATIO + + cost.cpu + ranges * KEY_LOOKUP_COST + + (rows - ranges) * KEY_NEXT_FIND_COST); +} + + +/* + Read a row from a clustered index + + Cost is similar to ha_rnd_pos_call_time() as a index_read() on a clusterd + key has identical code as rnd_pos() (At least in InnoDB:) +*/ + +double handler::ha_keyread_clustered_and_copy_time(uint index, ulong ranges, + ha_rows rows, + ulonglong blocks) +{ + if (rows < ranges) + rows= ranges; + IO_AND_CPU_COST cost= keyread_time(index, ranges, rows, blocks); + return (cost.io * DISK_READ_RATIO + + cost.cpu + ranges * ROW_LOOKUP_COST + + (rows - ranges) * ROW_NEXT_FIND_COST + + rows * ROW_COPY_COST); +} + THD *handler::ha_thd(void) const { DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd); @@ -3346,7 +3427,7 @@ int handler::ha_open(TABLE *table_arg, const char *name, int mode, name, ht->db_type, table_arg->db_stat, mode, test_if_locked)); - table= table_arg; + set_table(table_arg); DBUG_ASSERT(table->s == table_share); DBUG_ASSERT(m_lock_type == F_UNLCK); DBUG_PRINT("info", ("old m_lock_type: %d F_UNLCK %d", m_lock_type, F_UNLCK)); @@ -3396,14 +3477,15 @@ int handler::ha_open(TABLE *table_arg, const char *name, int mode, else dup_ref=ref+ALIGN_SIZE(ref_length); cached_table_flags= table_flags(); - + if (!table->s->optimizer_costs_inited) + { + table->s->optimizer_costs_inited=1; + /* Copy data from global 'engine'.optimizer_costs to TABLE_SHARE */ + table->s->update_optimizer_costs(partition_ht()); + /* Update costs depend on table structure */ + update_optimizer_costs(&table->s->optimizer_costs); + } /* Copy current optimizer costs. Needed in case clone() is used */ - set_optimizer_costs(table->in_use); - DBUG_ASSERT(optimizer_key_copy_cost >= 0.0); - DBUG_ASSERT(optimizer_key_next_find_cost >= 0.0); - DBUG_ASSERT(optimizer_row_copy_cost >= 0.0); - DBUG_ASSERT(optimizer_where_cost >= 0.0); - DBUG_ASSERT(optimizer_key_cmp_cost >= 0.0); reset_statistics(); } internal_tmp_table= MY_TEST(test_if_locked & HA_OPEN_INTERNAL_TABLE); @@ -3435,6 +3517,15 @@ int handler::ha_close(void) DBUG_RETURN(close()); } +void handler::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) +{ + DBUG_ASSERT(table_arg->s == share); + table= table_arg; + table_share= share; + costs= &share->optimizer_costs; + reset_statistics(); +} + int handler::ha_rnd_next(uchar *buf) { @@ -8744,27 +8835,19 @@ Table_scope_and_contents_source_st::fix_period_fields(THD *thd, } /* - Copy common optimizer cost variables to the engine - - This is needed to provide fast acccess to these variables during - optimization (as we refer to them multiple times). + Copy upper level cost to the engine as part of start statement - The other option would be to access them from thd, but that - would require a function call (as we cannot access THD from - an inline handler function) and two extra memory accesses - for each variable. + This is needed to provide fast access to these variables during + optimization (as we refer to them multiple times during one query). - index_block_copy_cost is not copied as it is used so seldom. + The other option would be to access them from THD, but that would + require a function call (as we cannot easily access THD from an + inline handler function) and two extra memory accesses for each + variable. */ - void handler::set_optimizer_costs(THD *thd) { - optimizer_key_copy_cost= thd->variables.optimizer_key_copy_cost; - optimizer_key_next_find_cost= - thd->variables.optimizer_key_next_find_cost; - optimizer_row_copy_cost= thd->variables.optimizer_row_copy_cost; - optimizer_where_cost= thd->variables.optimizer_where_cost; - optimizer_key_cmp_cost= thd->variables.optimizer_key_cmp_cost; - set_optimizer_cache_cost(thd->optimizer_cache_hit_ratio); + optimizer_where_cost= thd->variables.optimizer_where_cost; + optimizer_scan_setup_cost= thd->variables.optimizer_scan_setup_cost; } diff --git a/sql/handler.h b/sql/handler.h index a5d4248653a..5cdf55a9641 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -26,9 +26,9 @@ #endif #include "sql_const.h" -#include "optimizer_costs.h" #include "sql_basic_types.h" #include "mysqld.h" /* server_id */ +#include "optimizer_costs.h" #include "sql_plugin.h" /* plugin_ref, st_plugin_int, plugin */ #include "thr_lock.h" /* thr_lock_type, THR_LOCK_DATA */ #include "sql_cache.h" @@ -36,6 +36,7 @@ #include "sql_array.h" /* Dynamic_array<> */ #include "mdl.h" #include "vers_string.h" +#include "optimizer_costs.h" #include "sql_analyze_stmt.h" // for Exec_time_tracker @@ -1046,6 +1047,7 @@ enum enum_schema_tables SCH_KEY_CACHES, SCH_KEY_COLUMN_USAGE, SCH_OPEN_TABLES, + SCH_OPTIMIZER_COSTS, SCH_OPT_TRACE, SCH_PARAMETERS, SCH_PARTITIONS, @@ -1496,6 +1498,10 @@ struct handlerton /* Called for all storage handlers after ddl recovery is done */ void (*signal_ddl_recovery_done)(handlerton *hton); + /* Called at startup to update default engine costs */ + void (*update_optimizer_costs)(OPTIMIZER_COSTS *costs); + void *optimizer_costs; /* Costs are stored here */ + /* Optional clauses in the CREATE/ALTER TABLE */ @@ -3080,6 +3086,21 @@ enum class Compare_keys : uint32_t NotEqual }; +/* Cost for reading a row through an index */ +struct INDEX_READ_COST +{ + double read_cost; + double index_only_cost; +}; + +/* Separated costs for IO and CPU. For handler::keyread_time() */ +struct IO_AND_CPU_COST +{ + double io; + double cpu; +}; + + /** The handler class is the interface for dynamically loadable storage engines. Do not add ifdefs and take care when adding or @@ -3140,9 +3161,10 @@ protected: ha_rows estimation_rows_to_insert; handler *lookup_handler; public: - handlerton *ht; /* storage engine of this handler */ - uchar *ref; /* Pointer to current row */ - uchar *dup_ref; /* Pointer to duplicate row */ + handlerton *ht; /* storage engine of this handler */ + OPTIMIZER_COSTS *costs; /* Points to table->share->costs */ + uchar *ref; /* Pointer to current row */ + uchar *dup_ref; /* Pointer to duplicate row */ uchar *lookup_buffer; ha_statistics stats; @@ -3215,15 +3237,6 @@ public: ulonglong rows_changed; /* One bigger than needed to avoid to test if key == MAX_KEY */ ulonglong index_rows_read[MAX_KEY+1]; - /* - Cost of using key/record cache: (100-cache_hit_ratio)/100 - Updated from THD in open_tables() - */ - double optimizer_cache_cost; - double optimizer_key_next_find_cost; - double optimizer_row_copy_cost, optimizer_key_copy_cost; - double optimizer_where_cost, optimizer_key_cmp_cost; - ha_copy_info copy_info; private: @@ -3342,13 +3355,15 @@ private: For non partitioned handlers this is &TABLE_SHARE::ha_share. */ Handler_share **ha_share; + double optimizer_where_cost; // Copy of THD->...optimzer_where_cost + double optimizer_scan_setup_cost; // Copy of THD->...optimzer_scan_... public: handler(handlerton *ht_arg, TABLE_SHARE *share_arg) :table_share(share_arg), table(0), estimation_rows_to_insert(0), lookup_handler(this), - ht(ht_arg), ref(0), lookup_buffer(NULL), end_range(NULL), + ht(ht_arg), costs(0), ref(0), lookup_buffer(NULL), end_range(NULL), implicit_emptied(0), mark_trx_read_write_done(0), check_table_binlog_row_based_done(0), @@ -3359,7 +3374,6 @@ public: ref_length(sizeof(my_off_t)), ft_handler(0), inited(NONE), pre_inited(NONE), pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0), - optimizer_cache_cost((100-DEFAULT_CACHE_HIT_RATIO)/100.0), tracker(NULL), pushed_idx_cond(NULL), pushed_idx_cond_keyno(MAX_KEY), @@ -3373,12 +3387,19 @@ public: m_psi_numrows(0), m_psi_locker(NULL), row_logging(0), row_logging_init(0), - m_lock_type(F_UNLCK), ha_share(NULL) + m_lock_type(F_UNLCK), ha_share(NULL), optimizer_where_cost(0), + optimizer_scan_setup_cost(0) { DBUG_PRINT("info", ("handler created F_UNLCK %d F_RDLCK %d F_WRLCK %d", F_UNLCK, F_RDLCK, F_WRLCK)); reset_statistics(); + /* + The following variables should be updated in set_optimizer_costs() + which is to be run as part of setting up the table for the query + */ + MEM_UNDEFINED(&optimizer_where_cost, sizeof(optimizer_where_cost)); + MEM_UNDEFINED(&optimizer_scan_setup_cost, sizeof(optimizer_scan_setup_cost)); } virtual ~handler(void) { @@ -3579,22 +3600,22 @@ public: bzero(©_info, sizeof(copy_info)); reset_copy_info(); } - virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) - { - table= table_arg; - table_share= share; - reset_statistics(); - } + virtual void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share); /* Time for a full table data scan. To be overrided by engines, should not be used by the sql level. */ protected: - virtual double scan_time() + virtual IO_AND_CPU_COST scan_time() { - return (((ulonglong2double(stats.data_file_length) / stats.block_size)) * - avg_io_cost()); + IO_AND_CPU_COST cost; + ulonglong length= stats.data_file_length; + cost.io= (double) (length / IO_SIZE) * avg_io_cost(); + cost.cpu= (!stats.block_size ? 0.0 : + (double) ((length + stats.block_size-1)/stats.block_size) * + INDEX_BLOCK_COPY_COST); + return cost; } public: @@ -3610,147 +3631,149 @@ public: a few rows and the extra cost has no practical effect. */ - inline double ha_scan_time() + inline double ha_scan_time(ha_rows rows) { - return (scan_time() * optimizer_cache_cost + - TABLE_SCAN_SETUP_COST * avg_io_cost()); + IO_AND_CPU_COST cost= scan_time(); + return (cost.io * DISK_READ_RATIO + + cost.cpu + TABLE_SCAN_SETUP_COST + + (double) rows * (ROW_NEXT_FIND_COST + ROW_COPY_COST)); } /* - Time for a full table scan, fetching the rows from the table and comparing - the row with the where clause + Time for a full table scan, fetching the rows from the table and comparing + the row with the where clause */ - inline double ha_scan_and_compare_time(ha_rows records) + inline double ha_scan_and_compare_time(ha_rows rows) { - return (ha_scan_time() + - (double) records * (ROW_COPY_COST + WHERE_COST)); + return ha_scan_time(rows) + (double) rows * WHERE_COST; } + /* Cost of (random) reading a block of IO_SIZE */ virtual double avg_io_cost() { - return 1.0; + return DISK_READ_COST; } - virtual void set_optimizer_costs(THD *thd); - /* - Set cost for finding a row in the engine cache - This allows the handler to override the cost if there is no - caching of rows, like in heap or federatedx. + Update table->share optimizer costs for this particular table. + Called once when table is opened the first time. */ - virtual void set_optimizer_cache_cost(double cost) - { - optimizer_cache_cost= cost; - } - - /** - The cost of reading a set of ranges from the table using an index - to access it. - - @param index The index number. - @param ranges The number of ranges to be read. If 0, it means that - we calculate separately the cost of reading the key. - @param rows Total number of rows to be read. - - This method can be used to calculate the total cost of scanning a table - using an index by calling it using read_time(index, 1, table_size). + virtual void update_optimizer_costs(OPTIMIZER_COSTS *costs) {} - This function is to be reimplemented by engines (if needed). The sql_level - should call ha_read_time(), ha_read_and_copy_time() or - ha_read_and_compare_time(). + /* + Set handler optimizer cost variables. + Called for each table used by the statment + This is virtual mainly for the partition engine. */ + virtual void set_optimizer_costs(THD *thd); + protected: - virtual double read_time(uint index, uint ranges, ha_rows rows) + /* + Cost of reading 'rows' number of rows with a rowid + */ + virtual IO_AND_CPU_COST rnd_pos_time(ha_rows rows) { - return ((rows2double(rows) * ROW_LOOKUP_COST + - rows2double(ranges) * KEY_LOOKUP_COST) * avg_io_cost()); + double r= rows2double(rows); + return + { + r * avg_io_cost() * stats.block_size/IO_SIZE, // Blocks read + r * INDEX_BLOCK_COPY_COST // Copy block from cache + }; } public: - /* Same as above, but take into account CACHE_COST */ - inline double ha_read_time(uint index, uint ranges, ha_rows rows) - { - return read_time(index, ranges, rows) * optimizer_cache_cost; - } + /* + Time for doing and internal rnd_pos() inside the engine. For some + engine, this is more efficient than the SQL layer calling + rnd_pos() as there is no overhead in converting/checking the + rnd_pos_value. This is used when calculating the cost of fetching + a key+row in one go (like when scanning an index and fetching the + row). + */ - /* Same as above, but take into account also copying of the row to 'record' */ - inline double ha_read_and_copy_time(uint index, uint ranges, ha_rows rows) + inline double ha_rnd_pos_time(ha_rows rows) { - return (ha_read_time(index, ranges, rows) + - rows2double(rows) * ROW_COPY_COST); + IO_AND_CPU_COST cost= rnd_pos_time(rows); + return (cost.io * DISK_READ_RATIO + + cost.cpu + rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST)); } - /* Same as above, but take into account also copying and comparing the row */ - inline double ha_read_and_compare_time(uint index, uint ranges, ha_rows rows) + /* + This cost if when we are calling rnd_pos() explict in the call + For the moment this function is identical to ha_rnd_pos time, + but that may change in the future after we do more cost checks for + more engines. + */ + inline double ha_rnd_pos_call_time(ha_rows rows) { - return (ha_read_time(index, ranges, rows) + - rows2double(rows) * (ROW_COPY_COST + WHERE_COST)); + IO_AND_CPU_COST cost= rnd_pos_time(rows); + return (cost.io * DISK_READ_RATIO + + cost.cpu + rows2double(rows) * (ROW_LOOKUP_COST + ROW_COPY_COST)); } - /* Cost of reading a row with rowid */ -protected: - virtual double rnd_pos_time(ha_rows rows) + inline double ha_rnd_pos_call_and_compare_time(ha_rows rows) { - return rows2double(rows) * ROW_LOOKUP_COST * avg_io_cost(); - } -public: - /* - Same as above, but take into account cache_cost and copying of the row - to 'record'. - Note that this should normally be same as ha_read_time(some_key, 0, rows) - */ - inline double ha_rnd_pos_time(ha_rows rows) - { - return (rnd_pos_time(rows) * optimizer_cache_cost + - rows2double(rows) * ROW_COPY_COST); + return (ha_rnd_pos_call_time(rows) + rows2double(rows) * WHERE_COST); } /** - Calculate cost of 'index_only' scan for given index and number of records. - - @param index Index to read - @param flag If flag == 1 then the function returns the cost of - index only scan by index 'index' of one range containing - 'rows' key entries. - If flag == 0 then function returns only the cost of copying - those key entries into the engine buffers. - @param rows #of records to read + Calculate cost of 'index_only' scan for given index, a number of reanges + and number of records. + + @param index Index to read + @param rows #of records to read + @param blocks Number of IO blocks that needs to be accessed. + 0 if not known (in which case it's calculated) */ protected: - virtual double keyread_time(uint index, uint flag, ha_rows rows); + virtual IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks); public: /* Calculate cost of 'keyread' scan for given index and number of records including fetching the key to the 'record' buffer. */ + double ha_keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks); - inline double ha_keyread_time(uint index, uint flag, ha_rows rows) + /* Same as above, but take into account copying the key the the SQL layer */ + inline double ha_keyread_and_copy_time(uint index, ulong ranges, + ha_rows rows, ulonglong blocks) { - return (keyread_time(index, flag, rows) * optimizer_cache_cost); + return (ha_keyread_time(index, ranges, rows, blocks) + + (double) rows * KEY_COPY_COST); } - /* Same as above, but take into account copying the key the the SQL layer */ - inline double ha_keyread_and_copy_time(uint index, uint flag, ha_rows rows) + inline double ha_keyread_and_compare_time(uint index, ulong ranges, + ha_rows rows, ulonglong blocks) { - return ha_keyread_time(index, flag, rows) + (double) rows * KEY_COPY_COST; + return (ha_keyread_time(index, ranges, rows, blocks) + + (double) rows * (KEY_COPY_COST + WHERE_COST)); } + double ha_keyread_clustered_and_copy_time(uint index, ulong ranges, + ha_rows rows, + ulonglong blocks); /* Time for a full table index scan (without copy or compare cost). To be overrided by engines, sql level should use ha_key_scan_time(). + Note that IO_AND_CPU_COST does not include avg_io_cost() ! */ protected: - virtual double key_scan_time(uint index) + virtual IO_AND_CPU_COST key_scan_time(uint index, ha_rows rows) { - return keyread_time(index, 1, records()); + return keyread_time(index, 1, MY_MAX(rows, 1), 0); } public: /* Cost of doing a full index scan */ - inline double ha_key_scan_time(uint index) + inline double ha_key_scan_time(uint index, ha_rows rows) { - return (key_scan_time(index) * optimizer_cache_cost); + IO_AND_CPU_COST cost= key_scan_time(index, rows); + return (cost.io * DISK_READ_RATIO + + cost.cpu + INDEX_SCAN_SETUP_COST + KEY_LOOKUP_COST + + (double) rows * (KEY_NEXT_FIND_COST + KEY_COPY_COST)); } /* @@ -3759,8 +3782,7 @@ public: */ inline double ha_key_scan_and_compare_time(uint index, ha_rows rows) { - return (ha_key_scan_time(index) + - (double) rows * (KEY_COPY_COST + WHERE_COST)); + return ha_key_scan_time(index, rows) + (double) rows * WHERE_COST; } virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; } @@ -5213,7 +5235,7 @@ public: ha_share= arg_ha_share; return false; } - void set_table(TABLE* table_arg) { table= table_arg; } + inline void set_table(TABLE* table_arg); int get_lock_type() const { return m_lock_type; } public: /* XXX to be removed, see ha_partition::partition_ht() */ @@ -5297,7 +5319,7 @@ protected: void unlock_shared_ha_data(); /* - Mroonga needs to call read_time() directly for it's internal handler + Mroonga needs to call some xxx_time() directly for it's internal handler methods */ friend class ha_mroonga; diff --git a/sql/item_func.cc b/sql/item_func.cc index a07595cbbd8..2f110406a6d 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -5920,6 +5920,7 @@ bool Item_func_get_system_var::fix_length_and_dec(THD *thd) decimals=0; break; case SHOW_DOUBLE: + case SHOW_OPTIMIZER_COST: decimals= 6; collation= DTCollation_numeric(); fix_char_length(DBL_DIG + 6); @@ -5977,6 +5978,7 @@ const Type_handler *Item_func_get_system_var::type_handler() const case SHOW_CHAR_PTR: case SHOW_LEX_STRING: return &type_handler_varchar; + case SHOW_OPTIMIZER_COST: case SHOW_DOUBLE: return &type_handler_double; default: diff --git a/sql/json_table.cc b/sql/json_table.cc index 05ee83bd3d8..949175d8027 100644 --- a/sql/json_table.cc +++ b/sql/json_table.cc @@ -54,6 +54,7 @@ public: bzero(&m_hton, sizeof(m_hton)); m_hton.tablefile_extensions= hton_no_exts; m_hton.slot= HA_SLOT_UNDEF; + m_hton.flags= HTON_HIDDEN; } }; @@ -245,6 +246,10 @@ public: int open(const char *name, int mode, uint test_if_locked) override { return 0; } int close(void) override { return 0; } + void update_optimizer_costs(OPTIMIZER_COSTS *costs) + { + memcpy(costs, &heap_optimizer_costs, sizeof(*costs)); + } int rnd_init(bool scan) override; int rnd_next(uchar *buf) override; int rnd_pos(uchar * buf, uchar *pos) override; diff --git a/sql/keycaches.cc b/sql/keycaches.cc index 10bec7c1de8..250a287e229 100644 --- a/sql/keycaches.cc +++ b/sql/keycaches.cc @@ -15,6 +15,10 @@ #include "mariadb.h" #include "keycaches.h" +#include "optimizer_costs.h" +#include "optimizer_defaults.h" +#include "handler.h" +#include "sql_class.h" /**************************************************************************** Named list handling @@ -22,10 +26,13 @@ NAMED_ILIST key_caches; NAMED_ILIST rpl_filters; +NAMED_ILIST linked_optimizer_costs; extern "C" PSI_memory_key key_memory_KEY_CACHE; extern PSI_memory_key key_memory_NAMED_ILINK_name; +LEX_CSTRING default_base= {STRING_WITH_LEN("default")}; + /** ilink (intrusive list element) with a name */ @@ -46,7 +53,7 @@ public: } inline bool cmp(const char *name_cmp, size_t length) { - return length == name_length && !memcmp(name, name_cmp, length); + return !system_charset_info->strnncoll(name, name_length, name_cmp, length); } ~NAMED_ILINK() { @@ -72,7 +79,8 @@ uchar* find_named(I_List<NAMED_ILINK> *list, const char *name, size_t length, } -bool NAMED_ILIST::delete_element(const char *name, size_t length, void (*free_element)(const char *name, void*)) +bool NAMED_ILIST::delete_element(const char *name, size_t length, + void (*free_element)(const char *name, void*)) { I_List_iterator<NAMED_ILINK> it(*this); NAMED_ILINK *element; @@ -104,14 +112,12 @@ void NAMED_ILIST::delete_elements(void (*free_element)(const char *name, void*)) /* Key cache functions */ -LEX_CSTRING default_key_cache_base= {STRING_WITH_LEN("default")}; - KEY_CACHE zero_key_cache; ///< @@nonexistent_cache.param->value_ptr() points here KEY_CACHE *get_key_cache(const LEX_CSTRING *cache_name) { if (!cache_name || ! cache_name->length) - cache_name= &default_key_cache_base; + cache_name= &default_base; return ((KEY_CACHE*) find_named(&key_caches, cache_name->str, cache_name->length, 0)); } @@ -234,3 +240,128 @@ void free_all_rpl_filters() { rpl_filters.delete_elements(free_rpl_filter); } + + +/****************************************************************************** + Optimizer costs functions +******************************************************************************/ + +LEX_CSTRING default_costs_base= {STRING_WITH_LEN("default")}; + +OPTIMIZER_COSTS default_optimizer_costs= +{ + DEFAULT_DISK_READ_COST, // disk_read_cost + DEFAULT_INDEX_BLOCK_COPY_COST, // index_block_copy_cost + DEFAULT_WHERE_COST/4, // key_cmp_cost + DEFAULT_KEY_COPY_COST, // key_copy_cost + DEFAULT_KEY_LOOKUP_COST, // key_lookup_cost + DEFAULT_KEY_NEXT_FIND_COST, // key_next_find_cost + DEFAULT_DISK_READ_RATIO, // disk_read_ratio + DEFAULT_ROW_COPY_COST, // row_copy_cost + DEFAULT_ROW_LOOKUP_COST, // row_lookup_cost + DEFAULT_ROW_NEXT_FIND_COST, // row_next_find_cost + DEFAULT_ROWID_COMPARE_COST, // rowid_compare_cost + DEFAULT_ROWID_COPY_COST, // rowid_copy_cost + 1 // Cannot be deleted +}; + +OPTIMIZER_COSTS heap_optimizer_costs, tmp_table_optimizer_costs; + +OPTIMIZER_COSTS *get_optimizer_costs(const LEX_CSTRING *cache_name) +{ + if (!cache_name->length) + return &default_optimizer_costs; + return ((OPTIMIZER_COSTS*) find_named(&linked_optimizer_costs, + cache_name->str, cache_name->length, + 0)); +} + +OPTIMIZER_COSTS *create_optimizer_costs(const char *name, size_t length) +{ + OPTIMIZER_COSTS *optimizer_costs; + DBUG_ENTER("create_optimizer_costs"); + DBUG_PRINT("enter",("name: %.*s", (int) length, name)); + + if ((optimizer_costs= (OPTIMIZER_COSTS*) + my_malloc(key_memory_KEY_CACHE, + sizeof(OPTIMIZER_COSTS), MYF(MY_ZEROFILL | MY_WME)))) + { + if (!new NAMED_ILINK(&linked_optimizer_costs, name, length, + (uchar*) optimizer_costs)) + { + my_free(optimizer_costs); + optimizer_costs= 0; + } + else + { + /* Mark that values are not yet set */ + for (uint i=0 ; i < sizeof(OPTIMIZER_COSTS)/sizeof(double) ; i++) + ((double*) optimizer_costs)[i]= OPTIMIZER_COST_UNDEF; + } + } + DBUG_RETURN(optimizer_costs); +} + + +OPTIMIZER_COSTS *get_or_create_optimizer_costs(const char *name, size_t length) +{ + LEX_CSTRING optimizer_costs_name; + OPTIMIZER_COSTS *optimizer_costs; + + optimizer_costs_name.str= name; + optimizer_costs_name.length= length; + if (!(optimizer_costs= get_optimizer_costs(&optimizer_costs_name))) + optimizer_costs= create_optimizer_costs(name, length); + return optimizer_costs; +} + +extern "C" +{ +bool process_optimizer_costs(process_optimizer_costs_t func, TABLE *param) +{ + I_List_iterator<NAMED_ILINK> it(linked_optimizer_costs); + NAMED_ILINK *element; + int res= 0; + + while ((element= it++)) + { + LEX_CSTRING name= { element->name, element->name_length }; + OPTIMIZER_COSTS *costs= (OPTIMIZER_COSTS *) element->data; + res |= func(&name, costs, param); + } + return res != 0; +} +} + +bool create_default_optimizer_costs() +{ + return (new NAMED_ILINK(&linked_optimizer_costs, + default_base.str, default_base.length, + (uchar*) &default_optimizer_costs)) == 0; +} + + +/* + Make a copy of heap and tmp_table engine costs to be able to create + internal temporary tables without taking a mutex. +*/ + +void copy_tmptable_optimizer_costs() +{ + memcpy(&heap_optimizer_costs, heap_hton->optimizer_costs, + sizeof(heap_optimizer_costs)); + memcpy(&tmp_table_optimizer_costs, TMP_ENGINE_HTON->optimizer_costs, + sizeof(tmp_table_optimizer_costs)); +} + + +static void free_optimizer_costs(const char *name, void *cost) +{ + if ((OPTIMIZER_COSTS*) cost != &default_optimizer_costs) + my_free(cost); +} + +void free_all_optimizer_costs() +{ + linked_optimizer_costs.delete_elements(free_optimizer_costs); +} diff --git a/sql/keycaches.h b/sql/keycaches.h index 68c3dd3a2b0..721251b6745 100644 --- a/sql/keycaches.h +++ b/sql/keycaches.h @@ -35,7 +35,7 @@ class NAMED_ILIST: public I_List<NAMED_ILINK> }; /* For key cache */ -extern LEX_CSTRING default_key_cache_base; +extern LEX_CSTRING default_base; extern KEY_CACHE zero_key_cache; extern NAMED_ILIST key_caches; diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index 234c4cdfd7a..d4103f669fa 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -20,6 +20,7 @@ #include "key.h" #include "sql_statistics.h" #include "rowid_filter.h" +#include "optimizer_defaults.h" /**************************************************************************** * Default MRR implementation (MRR to non-MRR converter) @@ -302,46 +303,37 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, if (total_rows != HA_POS_ERROR) { - double io_cost= avg_io_cost(); - double range_lookup_cost= (io_cost * KEY_LOOKUP_COST * - optimizer_cache_cost); + double key_cost; set_if_smaller(total_rows, max_rows); /* The following calculation is the same as in multi_range_read_info(): */ *flags |= HA_MRR_USE_DEFAULT_IMPL; cost->reset(); - cost->avg_io_cost= cost->idx_avg_io_cost= io_cost; + cost->avg_io_cost= cost->idx_avg_io_cost= 0; // Not used! if (!is_clustering_key(keyno)) { - cost->idx_io_count= (double) io_blocks; + key_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks); + cost->idx_cpu_cost= key_cost; + if (!(*flags & HA_MRR_INDEX_ONLY)) { - cost->idx_cpu_cost= (ha_keyread_time(keyno, 1, total_rows) + - (n_ranges-1) * range_lookup_cost); - cost->cpu_cost= ha_read_time(keyno, 0, total_rows); - cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST; + /* ha_rnd_pos_time includes ROW_COPY_COST */ + cost->cpu_cost= ha_rnd_pos_time(total_rows); } else { /* Index only read */ - cost->idx_cpu_cost= (ha_keyread_time(keyno, 1, total_rows) + - (n_ranges-1) * range_lookup_cost); - cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST; + cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST; } } else { - /* - Clustered index - If all index dives are to a few blocks, then limit the - ranges used by read_time to the number of dives. - */ + /* Clustered index */ io_blocks+= unassigned_single_point_ranges; - uint limited_ranges= (uint) MY_MIN((ulonglong) n_ranges, io_blocks); - cost->idx_cpu_cost= limited_ranges * range_lookup_cost; - cost->cpu_cost= ha_read_time(keyno, 0, total_rows); - cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST; + key_cost= ha_keyread_time(keyno, n_ranges, total_rows, io_blocks); + cost->idx_cpu_cost= key_cost; + cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST; } cost->comp_cost= (rows2double(total_rows) * WHERE_COST + MULTI_RANGE_READ_SETUP_COST); @@ -378,7 +370,7 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, @param keyno Index number @param n_ranges Estimated number of ranges (i.e. intervals) in the range sequence. - @param n_rows Estimated total number of records contained within all + @param total_rows Estimated total number of records contained within all of the ranges @param bufsz INOUT IN: Size of the buffer available for use OUT: Size of the buffer that will be actually used, or @@ -393,7 +385,7 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, other Error or can't perform the requested scan */ -ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, +ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint total_rows, uint key_parts, uint *bufsz, uint *flags, Cost_estimate *cost) { @@ -410,38 +402,27 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, /* Produce the same cost as non-MRR code does */ if (!is_clustering_key(keyno)) { - double range_lookup_cost= (avg_io_cost() * KEY_LOOKUP_COST * - optimizer_cache_cost); - /* - idx_io_count could potentially be increased with the number of - index leaf blocks we have to read for finding n_rows. - */ - cost->idx_io_count= n_ranges; + double key_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0); + cost->idx_cpu_cost= key_cost; + if (!(*flags & HA_MRR_INDEX_ONLY)) { - cost->idx_cpu_cost= (keyread_time(keyno, 1, n_rows) + - (n_ranges-1) * range_lookup_cost); - cost->cpu_cost= read_time(keyno, 0, n_rows); - cost->copy_cost= rows2double(n_rows) * ROW_COPY_COST; + /* ha_rnd_pos_time includes ROW_COPY_COST */ + cost->cpu_cost= ha_rnd_pos_time(total_rows); } else { - /* - Same as above, but take into account copying the key to the upper - level. - */ - cost->idx_cpu_cost= (keyread_time(keyno, 1, n_rows) + - (n_ranges-1) * range_lookup_cost); - cost->copy_cost= rows2double(n_rows) * KEY_COPY_COST; + /* Index only read */ + cost->copy_cost= rows2double(total_rows) * KEY_COPY_COST; } } else { /* Clustering key */ - cost->cpu_cost= read_time(keyno, n_ranges, n_rows); - cost->copy_cost= rows2double(n_rows) * ROW_COPY_COST; + cost->cpu_cost= ha_keyread_time(keyno, n_ranges, total_rows, 0); + cost->copy_cost= rows2double(total_rows) * ROW_COPY_COST; } - cost->comp_cost= rows2double(n_rows) * WHERE_COST; + cost->comp_cost= rows2double(total_rows) * WHERE_COST; return 0; } @@ -2043,7 +2024,7 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, cost->mem_cost= (double)rows_in_last_step * elem_size; /* Total cost of all index accesses */ - index_read_cost= primary_file->ha_keyread_and_copy_time(keynr, 1, rows); + index_read_cost= primary_file->ha_keyread_and_copy_time(keynr, 1, rows, 0); cost->add_io(index_read_cost, 1 /* Random seeks */); return FALSE; } @@ -2081,42 +2062,6 @@ void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost) /** Get cost of reading nrows table records in a "disk sweep" - A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made - for an ordered sequence of rowids. - - We assume hard disk IO. The read is performed as follows: - - 1. The disk head is moved to the needed cylinder - 2. The controller waits for the plate to rotate - 3. The data is transferred - - Time to do #3 is insignificant compared to #2+#1. - - Time to move the disk head is proportional to head travel distance. - - Time to wait for the plate to rotate depends on whether the disk head - was moved or not. - - If disk head wasn't moved, the wait time is proportional to distance - between the previous block and the block we're reading. - - If the head was moved, we don't know how much we'll need to wait for the - plate to rotate. We assume the wait time to be a variate with a mean of - 0.5 of full rotation time. - - Our cost units are "random disk seeks". The cost of random disk seek is - actually not a constant, it depends one range of cylinders we're going - to access. We make it constant by introducing a fuzzy concept of "typical - datafile length" (it's fuzzy as it's hard to tell whether it should - include index file, temp.tables etc). Then random seek cost is: - - 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length - - We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9. - - If handler::avg_io_cost() < 1.0, then we will trust the handler - when it comes to the average cost (this is for example true for HEAP). - @param table Table to be accessed @param nrows Number of rows to retrieve @param interrupted TRUE <=> Assume that the disk sweep will be @@ -2131,8 +2076,7 @@ void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, cost->reset(); #ifndef OLD_SWEEP_COST - cost->cpu_cost= table->file->ha_rnd_pos_time(nrows); - cost->avg_io_cost= table->file->avg_io_cost(); + cost->cpu_cost= table->file->ha_rnd_pos_call_time(nrows); #else if (table->file->pk_is_clustering_key(table->s->primary_key)) { diff --git a/sql/mysqld.cc b/sql/mysqld.cc index c7c21dcf31c..204da6408ed 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -52,6 +52,7 @@ #include "sql_expression_cache.h" // subquery_cache_miss, subquery_cache_hit #include "sys_vars_shared.h" #include "ddl_log.h" +#include "optimizer_defaults.h" #include <m_ctype.h> #include <my_dir.h> @@ -732,7 +733,7 @@ mysql_mutex_t LOCK_prepared_stmt_count; #ifdef HAVE_OPENSSL mysql_mutex_t LOCK_des_key_file; #endif -mysql_mutex_t LOCK_backup_log; +mysql_mutex_t LOCK_backup_log, LOCK_optimizer_costs; mysql_rwlock_t LOCK_grant, LOCK_sys_init_connect, LOCK_sys_init_slave; mysql_rwlock_t LOCK_ssl_refresh; mysql_rwlock_t LOCK_all_status_vars; @@ -902,7 +903,7 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, key_LOCK_crypt, key_LOCK_delayed_create, key_LOCK_delayed_insert, key_LOCK_delayed_status, key_LOCK_error_log, key_LOCK_gdl, key_LOCK_global_system_variables, - key_LOCK_manager, key_LOCK_backup_log, + key_LOCK_manager, key_LOCK_backup_log, key_LOCK_optimizer_costs, key_LOCK_prepared_stmt_count, key_LOCK_rpl_status, key_LOCK_server_started, key_LOCK_status, key_LOCK_temp_pool, @@ -965,6 +966,7 @@ static PSI_mutex_info all_server_mutexes[]= { &key_hash_filo_lock, "hash_filo::lock", 0}, { &key_LOCK_active_mi, "LOCK_active_mi", PSI_FLAG_GLOBAL}, { &key_LOCK_backup_log, "LOCK_backup_log", PSI_FLAG_GLOBAL}, + { &key_LOCK_optimizer_costs, "LOCK_optimizer_costs", PSI_FLAG_GLOBAL}, { &key_LOCK_temp_pool, "LOCK_temp_pool", PSI_FLAG_GLOBAL}, { &key_LOCK_thread_id, "LOCK_thread_id", PSI_FLAG_GLOBAL}, { &key_LOCK_crypt, "LOCK_crypt", PSI_FLAG_GLOBAL}, @@ -2005,6 +2007,7 @@ static void clean_up(bool print_message) mdl_destroy(); dflt_key_cache= 0; key_caches.delete_elements(free_key_cache); + free_all_optimizer_costs(); wt_end(); multi_keycache_free(); sp_cache_end(); @@ -2127,6 +2130,7 @@ static void clean_up_mutexes() mysql_mutex_destroy(&LOCK_active_mi); mysql_rwlock_destroy(&LOCK_ssl_refresh); mysql_mutex_destroy(&LOCK_backup_log); + mysql_mutex_destroy(&LOCK_optimizer_costs); mysql_mutex_destroy(&LOCK_temp_pool); mysql_rwlock_destroy(&LOCK_sys_init_connect); mysql_rwlock_destroy(&LOCK_sys_init_slave); @@ -4516,6 +4520,8 @@ static int init_thread_environment() mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered, MY_MUTEX_INIT_SLOW); mysql_mutex_init(key_LOCK_backup_log, &LOCK_backup_log, MY_MUTEX_INIT_FAST); + mysql_mutex_init(key_LOCK_optimizer_costs, &LOCK_optimizer_costs, + MY_MUTEX_INIT_FAST); mysql_mutex_init(key_LOCK_temp_pool, &LOCK_temp_pool, MY_MUTEX_INIT_FAST); #ifdef HAVE_OPENSSL @@ -5435,6 +5441,7 @@ static int init_server_components() unireg_abort(1); } #endif + copy_tmptable_optimizer_costs(); #ifdef WITH_WSREP /* @@ -7826,12 +7833,17 @@ static int mysql_init_variables(void) strnmov(server_version, MYSQL_SERVER_VERSION, sizeof(server_version)-1); thread_cache.init(); key_caches.empty(); - if (!(dflt_key_cache= get_or_create_key_cache(default_key_cache_base.str, - default_key_cache_base.length))) + if (!(dflt_key_cache= get_or_create_key_cache(default_base.str, + default_base.length))) { sql_print_error("Cannot allocate the keycache"); return 1; } + if (create_default_optimizer_costs()) + { + sql_print_error("Cannot allocate optimizer_costs"); + return 1; + } /* set key_cache_hash.default_value = dflt_key_cache */ multi_keycache_init(); @@ -8412,11 +8424,14 @@ mysqld_get_one_option(const struct my_option *opt, const char *argument, } -/** Handle arguments for multiple key caches. */ +/** + Handle arguments for multiple key caches, replication_options and + optimizer_costs + */ C_MODE_START -static void* +static void * mysql_getopt_value(const char *name, uint length, const struct my_option *option, int *error) { @@ -8454,6 +8469,7 @@ mysql_getopt_value(const char *name, uint length, } /* We return in all cases above. Let us silence -Wimplicit-fallthrough */ DBUG_ASSERT(0); + break; #ifdef HAVE_REPLICATION /* fall through */ case OPT_REPLICATE_DO_DB: @@ -8481,11 +8497,87 @@ mysql_getopt_value(const char *name, uint length, } return 0; } -#endif + break; +#endif + case OPT_COSTS_DISK_READ_COST: + case OPT_COSTS_INDEX_BLOCK_COPY_COST: + case OPT_COSTS_KEY_CMP_COST: + case OPT_COSTS_KEY_COPY_COST: + case OPT_COSTS_KEY_LOOKUP_COST: + case OPT_COSTS_KEY_NEXT_FIND_COST: + case OPT_COSTS_DISK_READ_RATIO: + case OPT_COSTS_ROW_COPY_COST: + case OPT_COSTS_ROW_LOOKUP_COST: + case OPT_COSTS_ROW_NEXT_FIND_COST: + case OPT_COSTS_ROWID_CMP_COST: + case OPT_COSTS_ROWID_COPY_COST: + { + OPTIMIZER_COSTS *costs; + if (unlikely(!(costs= get_or_create_optimizer_costs(name, length)))) + { + if (error) + *error= EXIT_OUT_OF_MEMORY; + return 0; + } + switch (option->id) { + case OPT_COSTS_DISK_READ_COST: + return &costs->disk_read_cost; + case OPT_COSTS_INDEX_BLOCK_COPY_COST: + return &costs->index_block_copy_cost; + case OPT_COSTS_KEY_CMP_COST: + return &costs->key_cmp_cost; + case OPT_COSTS_KEY_COPY_COST: + return &costs->key_copy_cost; + case OPT_COSTS_KEY_LOOKUP_COST: + return &costs->key_lookup_cost; + case OPT_COSTS_KEY_NEXT_FIND_COST: + return &costs->key_next_find_cost; + case OPT_COSTS_DISK_READ_RATIO: + return &costs->disk_read_ratio; + case OPT_COSTS_ROW_COPY_COST: + return &costs->row_copy_cost; + case OPT_COSTS_ROW_LOOKUP_COST: + return &costs->row_lookup_cost; + case OPT_COSTS_ROW_NEXT_FIND_COST: + return &costs->row_next_find_cost; + case OPT_COSTS_ROWID_CMP_COST: + return &costs->rowid_cmp_cost; + case OPT_COSTS_ROWID_COPY_COST: + return &costs->rowid_copy_cost; + default: + DBUG_ASSERT(0); + } + } } return option->value; } + +static void +mariadb_getopt_adjust_value(const struct my_option *option, void *value) +{ + switch (option->id) { + case OPT_COSTS_DISK_READ_COST: + case OPT_COSTS_INDEX_BLOCK_COPY_COST: + case OPT_COSTS_KEY_CMP_COST: + case OPT_COSTS_KEY_COPY_COST: + case OPT_COSTS_KEY_LOOKUP_COST: + case OPT_COSTS_KEY_NEXT_FIND_COST: + case OPT_COSTS_DISK_READ_RATIO: + case OPT_COSTS_ROW_COPY_COST: + case OPT_COSTS_ROW_LOOKUP_COST: + case OPT_COSTS_ROW_NEXT_FIND_COST: + case OPT_COSTS_ROWID_CMP_COST: + case OPT_COSTS_ROWID_COPY_COST: + /* Value from command is line given in usec. Convert to ms */ + *(double*) value= *(double*) value/1000.0; + break; + default: + break; + } +} + + static void option_error_reporter(enum loglevel level, const char *format, ...) { va_list args; @@ -8524,6 +8616,7 @@ static int get_options(int *argc_ptr, char ***argv_ptr) my_getopt_get_addr= mysql_getopt_value; my_getopt_error_reporter= option_error_reporter; + my_getopt_adjust_value= mariadb_getopt_adjust_value; /* prepare all_options array */ my_init_dynamic_array(PSI_INSTRUMENT_ME, &all_options, sizeof(my_option), diff --git a/sql/mysqld.h b/sql/mysqld.h index 90306ccb290..22984babf97 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -330,7 +330,7 @@ extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_xid_list, key_LOCK_logger, key_LOCK_manager, key_LOCK_prepared_stmt_count, key_LOCK_rpl_status, key_LOCK_server_started, - key_LOCK_status, + key_LOCK_status, key_LOCK_optimizer_costs, key_LOCK_thd_data, key_LOCK_thd_kill, key_LOCK_user_conn, key_LOG_LOCK_log, key_master_info_data_lock, key_master_info_run_lock, @@ -760,7 +760,8 @@ extern mysql_mutex_t LOCK_error_log, LOCK_delayed_insert, LOCK_short_uuid_generator, LOCK_delayed_status, LOCK_delayed_create, LOCK_crypt, LOCK_timezone, LOCK_active_mi, LOCK_manager, LOCK_user_conn, - LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_backup_log; + LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_backup_log, + LOCK_optimizer_costs; extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_global_system_variables; extern mysql_rwlock_t LOCK_all_status_vars; extern mysql_mutex_t LOCK_start_thread; @@ -795,6 +796,18 @@ enum options_mysqld OPT_BINLOG_IGNORE_DB, OPT_BIN_LOG, OPT_BOOTSTRAP, + OPT_COSTS_DISK_READ_COST, + OPT_COSTS_INDEX_BLOCK_COPY_COST, + OPT_COSTS_KEY_CMP_COST, + OPT_COSTS_KEY_COPY_COST, + OPT_COSTS_KEY_LOOKUP_COST, + OPT_COSTS_KEY_NEXT_FIND_COST, + OPT_COSTS_DISK_READ_RATIO, + OPT_COSTS_ROW_COPY_COST, + OPT_COSTS_ROW_LOOKUP_COST, + OPT_COSTS_ROW_NEXT_FIND_COST, + OPT_COSTS_ROWID_CMP_COST, + OPT_COSTS_ROWID_COPY_COST, OPT_EXPIRE_LOGS_DAYS, OPT_BINLOG_EXPIRE_LOGS_SECONDS, OPT_CONSOLE, diff --git a/sql/opt_range.cc b/sql/opt_range.cc index fcad1c7159e..7f943a45fbc 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -2740,7 +2740,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, table_info.add_table_name(head); Json_writer_object trace_range(thd, "range_analysis"); - if (unlikely(thd->trace_started())) + if (unlikely(thd->trace_started()) && read_time != DBL_MAX) { Json_writer_object table_rec(thd, "table_scan"); table_rec.add("rows", records).add("cost", read_time); @@ -2867,10 +2867,11 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, thd->mem_root= &alloc; /* Calculate cost of full index read for the shortest covering index */ - if (!force_quick_range && !head->covering_keys.is_clear_all()) + if (!force_quick_range && !head->covering_keys.is_clear_all() && + !head->no_keyread) { - int key_for_use= find_shortest_key(head, &head->covering_keys); double key_read_time; + uint key_for_use= find_shortest_key(head, &head->covering_keys); key_read_time= head->file->ha_key_scan_and_compare_time(key_for_use, records); DBUG_PRINT("info", ("'all'+'using index' scan will be using key %d, " @@ -3057,7 +3058,8 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, param.table->set_opt_range_condition_rows(group_trp->records); DBUG_PRINT("info", ("table_rows: %llu opt_range_condition_rows: %llu " "group_trp->records: %ull", - table_records, param.table->opt_range_condition_rows, + table_records, + param.table->opt_range_condition_rows, group_trp->records)); Json_writer_object grp_summary(thd, "best_group_range_summary"); @@ -5079,7 +5081,7 @@ static double get_sweep_read_cost(const PARAM *param, ha_rows records, { DBUG_ENTER("get_sweep_read_cost"); #ifndef OLD_SWEEP_COST - double cost= (param->table->file->ha_rnd_pos_time(records) + + double cost= (param->table->file->ha_rnd_pos_call_time(records) + (add_time_for_compare ? records * param->thd->variables.optimizer_where_cost : 0)); DBUG_PRINT("return", ("cost: %g", cost)); @@ -5095,7 +5097,7 @@ static double get_sweep_read_cost(const PARAM *param, ha_rows records, We are using the primary key to find the rows. Calculate the cost for this. */ - result= table->file->ha_rnd_pos_time(records); + result= table->file->ha_rnd_pos_call_time(records); } else { @@ -5133,7 +5135,7 @@ static double get_sweep_read_cost(const PARAM *param, ha_rows records, */ result= busy_blocks; } - result+= rows2double(n_rows) * ROW_COPY_COST_THD(param->table->thd); + result+= rows2double(n_rows) * param->table->file->ROW_COPY_COST); } DBUG_PRINT("return",("cost: %g", result)); DBUG_RETURN(result); @@ -5347,7 +5349,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, is done in QUICK_RANGE_SELECT::row_in_ranges) */ double rid_comp_cost= (rows2double(non_cpk_scan_records) * - ROWID_COMPARE_COST_THD(param->thd)); + default_optimizer_costs.rowid_cmp_cost); imerge_cost+= rid_comp_cost; trace_best_disjunct.add("cost_of_mapping_rowid_in_non_clustered_pk_scan", rid_comp_cost); @@ -5359,7 +5361,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, double sweep_cost= get_sweep_read_cost(param, non_cpk_scan_records, 0); imerge_cost+= sweep_cost; trace_best_disjunct. - add("records", non_cpk_scan_records). + add("rows", non_cpk_scan_records). add("cost_sort_rowid_and_read_disk", sweep_cost). add("cost", imerge_cost); } @@ -5389,7 +5391,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, } { - const double dup_removal_cost= Unique::get_use_cost( + const double dup_removal_cost= Unique::get_use_cost(thd, param->imerge_cost_buff, (uint)non_cpk_scan_records, param->table->file->ref_length, (size_t)param->thd->variables.sortbuff_size, @@ -5463,10 +5465,9 @@ skip_to_ror_scan: double cost; if ((*cur_child)->is_ror) { - /* Ok, we have index_only cost, now get full rows scan cost */ + /* Ok, we have index_only cost, now get full rows lokoup cost */ cost= param->table->file-> - ha_read_and_compare_time(param->real_keynr[(*cur_child)->key_idx], 1, - (*cur_child)->records); + ha_rnd_pos_call_and_compare_time((*cur_child)->records); } else cost= read_time; @@ -5935,7 +5936,7 @@ bool prepare_search_best_index_intersect(PARAM *param, continue; } - cost= table->opt_range[(*index_scan)->keynr].index_only_fetch_cost(thd); + cost= table->opt_range[(*index_scan)->keynr].index_only_fetch_cost(table); idx_scan.add("cost", cost); @@ -6041,7 +6042,7 @@ bool prepare_search_best_index_intersect(PARAM *param, selected_idx.add("index", key_info->name); print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts); selected_idx. - add("records", (*scan_ptr)->records). + add("rows", (*scan_ptr)->records). add("filtered_records", (*scan_ptr)->filtered_out); } } @@ -6058,7 +6059,7 @@ bool prepare_search_best_index_intersect(PARAM *param, selected_idx.add("index", key_info->name); print_keyparts(thd, key_info, (*scan_ptr)->used_key_parts); selected_idx. - add("records", (*scan_ptr)->records). + add("rows", (*scan_ptr)->records). add("filtered_records", (*scan_ptr)->filtered_out); } } @@ -6324,7 +6325,8 @@ double get_cpk_filter_cost(ha_rows filtered_records, */ static -bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, +bool check_index_intersect_extension(THD *thd, + PARTIAL_INDEX_INTERSECT_INFO *curr, INDEX_SCAN_INFO *ext_index_scan, PARTIAL_INDEX_INTERSECT_INFO *next) { @@ -6371,7 +6373,7 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, size_t max_memory_size= common_info->max_memory_size; records_sent_to_unique+= ext_index_scan_records; - cost= Unique::get_use_cost(buff_elems, (size_t) records_sent_to_unique, + cost= Unique::get_use_cost(thd, buff_elems, (size_t) records_sent_to_unique, key_size, max_memory_size, compare_factor, TRUE, &next->in_memory); @@ -6382,7 +6384,7 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, double cost2; bool in_memory2; ha_rows records2= records_sent_to_unique-records_filtered_out_by_cpk; - cost2= Unique::get_use_cost(buff_elems, (size_t) records2, key_size, + cost2= Unique::get_use_cost(thd, buff_elems, (size_t) records2, key_size, max_memory_size, compare_factor, TRUE, &in_memory2); cost2+= get_cpk_filter_cost(ext_index_scan_records, common_info->cpk_scan, @@ -6442,7 +6444,8 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, */ static -void find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO *curr) +void find_index_intersect_best_extension(THD *thd, + PARTIAL_INDEX_INTERSECT_INFO *curr) { PARTIAL_INDEX_INTERSECT_INFO next; COMMON_INDEX_INTERSECT_INFO *common_info= curr->common_info; @@ -6475,8 +6478,9 @@ void find_index_intersect_best_extension(PARTIAL_INDEX_INTERSECT_INFO *curr) { *rem_first_index_scan_ptr= *index_scan_ptr; *index_scan_ptr= rem_first_index_scan; - if (check_index_intersect_extension(curr, *rem_first_index_scan_ptr, &next)) - find_index_intersect_best_extension(&next); + if (check_index_intersect_extension(thd, curr, *rem_first_index_scan_ptr, + &next)) + find_index_intersect_best_extension(thd, &next); *index_scan_ptr= *rem_first_index_scan_ptr; *rem_first_index_scan_ptr= rem_first_index_scan; } @@ -6528,7 +6532,7 @@ TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree, read_time)) DBUG_RETURN(NULL); - find_index_intersect_best_extension(&init); + find_index_intersect_best_extension(thd, &init); if (common.best_length <= 1 && !common.best_uses_cpk) DBUG_RETURN(NULL); @@ -6697,7 +6701,7 @@ ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg) */ ror_scan->index_read_cost= param->table->file->ha_keyread_and_copy_time(ror_scan->keynr, 1, - ror_scan->records); + ror_scan->records, 0); DBUG_RETURN(ror_scan); } @@ -13885,10 +13889,10 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) cause= "not single_table"; else if (join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */ cause= "rollup"; - else if (table->s->keys == 0) /* There are no indexes to use. */ + else if (table->s->keys == 0) // There are no indexes to use. cause= "no index"; else if (join->conds && join->conds->used_tables() - & OUTER_REF_TABLE_BIT) /* Cannot execute with correlated conditions. */ + & OUTER_REF_TABLE_BIT) // Cannot execute with correlated conditions. cause= "correlated conditions"; if (cause) @@ -14093,7 +14097,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) does not qualify as covering in our case. If this is the case, below we check that all query fields are indeed covered by 'cur_index'. */ - if (cur_index_info->user_defined_key_parts == table->actual_n_key_parts(cur_index_info) + if (cur_index_info->user_defined_key_parts == + table->actual_n_key_parts(cur_index_info) && pk < MAX_KEY && cur_index != pk && (table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX)) { @@ -14136,7 +14141,8 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) first Item? If so, then why? What is the array for? */ /* Above we already checked that all group items are fields. */ - DBUG_ASSERT((*tmp_group->item)->real_item()->type() == Item::FIELD_ITEM); + DBUG_ASSERT((*tmp_group->item)->real_item()->type() == + Item::FIELD_ITEM); Item_field *group_field= (Item_field *) (*tmp_group->item)->real_item(); if (group_field->field->eq(cur_part->field)) { @@ -15000,24 +15006,28 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, bool have_min, bool have_max, double *read_cost, ha_rows *records) { + uint keys_per_block, key_length; ha_rows table_records; ha_rows num_groups; ha_rows num_blocks; - uint keys_per_block; ha_rows keys_per_group; ha_rows keys_per_subgroup; /* Average number of keys in sub-groups */ /* formed by a key infix. */ double p_overlap; /* Probability that a sub-group overlaps two blocks. */ double quick_prefix_selectivity; double io_cost; + handler *file= table->file; DBUG_ENTER("cost_group_min_max"); + /* Same code as in handler::key_read_time() */ table_records= table->stat_records(); - /* Assume block is 75 % full */ - keys_per_block= (uint) (table->file->stats.block_size * 3 / 4 / - (index_info->key_length + table->file->ref_length) - + 1); - num_blocks= (ha_rows)(table_records / keys_per_block) + 1; + key_length= (index_info->key_length + file->ref_length); + num_blocks= (table_records * key_length / INDEX_BLOCK_FILL_FACTOR_DIV * + INDEX_BLOCK_FILL_FACTOR_MUL) / file->stats.block_size + 1; + keys_per_block= (file->stats.block_size / + (key_length * INDEX_BLOCK_FILL_FACTOR_MUL / + INDEX_BLOCK_FILL_FACTOR_DIV) + + 1); /* Compute the number of keys in a group. */ if (!group_key_parts) @@ -15035,7 +15045,10 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, keys_per_group= (table_records / 10) + 1; } } - num_groups= (table_records / keys_per_group) + 1; + if (keys_per_group > 1) + num_groups= (table_records / keys_per_group) + 1; + else + num_groups= table_records; /* Apply the selectivity of the quick select for group prefixes. */ if (range_tree && (quick_prefix_records != HA_POS_ERROR)) @@ -15059,8 +15072,7 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, /* There cannot be more groups than matched records */ set_if_smaller(num_groups, quick_prefix_records); } - /* Ensure we don't have more groups than rows in table */ - set_if_smaller(num_groups, table_records); + DBUG_ASSERT(num_groups <= table_records); if (used_key_parts > group_key_parts) { @@ -15081,39 +15093,22 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts, io_cost= (double) MY_MIN(num_groups * (1 + p_overlap), num_blocks); } else - io_cost= (keys_per_group > keys_per_block) ? - (have_min && have_max) ? (double) (num_groups + 1) : - (double) num_groups : - (double) num_blocks; + io_cost= ((keys_per_group > keys_per_block) ? + (have_min && have_max) ? (double) (num_groups + 1) : + (double) num_groups : + (double) num_blocks); /* CPU cost must be comparable to that of an index scan as computed in SQL_SELECT::test_quick_select(). When the groups are small, e.g. for a unique index, using index scan will be cheaper since it reads the next record without having to re-position to it on every - group. To make the CPU cost reflect this, we estimate the CPU cost - as the sum of: - 1. Cost for evaluating the condition for each num_group - KEY_COMPARE_COST (similarly as for index scan). - 2. Cost for navigating the index structure (assuming a b-tree). - Note: We only add the cost for one index comparision per block. For a - b-tree the number of comparisons will be larger. However the cost - is low as all of the upper level b-tree blocks should be in - memory. - TODO: This cost should be provided by the storage engine. - 3. Cost for comparing the row with the where clause + group. */ - const THD *thd= table->in_use; - const double tree_traversal_cost= - ceil(log(static_cast<double>(table_records))/ - log(static_cast<double>(keys_per_block))) * - thd->variables.optimizer_key_cmp_cost; - - const double cpu_cost= (num_groups * - (tree_traversal_cost + - thd->variables.optimizer_where_cost)); - - *read_cost= io_cost + cpu_cost; + uint keyno= (uint) (index_info - table->key_info); + *read_cost= file->ha_keyread_and_compare_time(keyno, (ulong) num_groups, + num_groups, + io_cost); *records= num_groups; DBUG_PRINT("info", diff --git a/sql/opt_split.cc b/sql/opt_split.cc index 8cb82693d96..8848c1820df 100644 --- a/sql/opt_split.cc +++ b/sql/opt_split.cc @@ -188,6 +188,7 @@ #include "mariadb.h" #include "sql_select.h" #include "opt_trace.h" +#include "optimizer_defaults.h" /* Info on a splitting field */ struct SplM_field_info @@ -665,6 +666,8 @@ add_ext_keyuses_for_splitting_field(Dynamic_array<KEYUSE_EXT> *ext_keyuses, /* @brief Cost of the post join operation used in specification of splittable table + This does not include the cost of creating the temporary table as this + operation can be executed many times for the same temporary table. */ static @@ -673,13 +676,18 @@ double spl_postjoin_oper_cost(THD *thd, double join_record_count, uint rec_len) double cost; TMPTABLE_COSTS tmp_cost= get_tmp_table_costs(thd, join_record_count, rec_len, 0, 1); - // cost to fill tmp table - cost= tmp_cost.create + tmp_cost.write * join_record_count; - // cost to perform post join operation used here + /* cost to fill tmp table */ + cost= tmp_cost.write * join_record_count; + /* cost to perform post join operation used here */ cost+= tmp_cost.lookup * join_record_count; - cost+= (join_record_count == 0 ? 0 : - join_record_count * log2 (join_record_count)) * - SORT_INDEX_CMP_COST; // cost to perform sorting + /* cost to preform sorting */ + /* QQQ + We should use cost_of_filesort() for computing sort. + Do we always preform sorting ? If not, this should be done conditionally + */ + cost+= ((join_record_count == 0 ? 0 : + join_record_count * log2 (join_record_count)) * + SORT_INDEX_CMP_COST); return cost; } @@ -873,7 +881,7 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start, splitting the function set it as the true plan of materialization of the table T. The function caches the found plans for materialization of table T - together if the info what key was used for splitting. Next time when + together with the info what key was used for splitting. Next time when the optimizer prefers to use the same key the plan is taken from the cache of plans @@ -1004,12 +1012,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, spl_opt_info->unsplit_card : 1); uint rec_len= table->s->rec_buff_length; - double split_card= spl_opt_info->unsplit_card * spl_plan->split_sel; - double oper_cost= split_card * - spl_postjoin_oper_cost(thd, split_card, rec_len); - spl_plan->cost= join->best_positions[join->table_count-1].read_time + - + oper_cost; + double oper_cost= (split_card * + spl_postjoin_oper_cost(thd, split_card, rec_len)); + spl_plan->cost= (join->best_positions[join->table_count-1].read_time + + oper_cost); if (unlikely(thd->trace_started())) { @@ -1030,7 +1037,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, } if (spl_plan) { - if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01) + if (record_count * spl_plan->cost + COST_EPS < spl_opt_info->unsplit_cost) { /* The best plan that employs splitting is cheaper than @@ -1054,7 +1061,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, trace. add("startup_cost", startup_cost). add("splitting_cost", spl_plan->cost). - add("records", records); + add("rows", records); } } else diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index a944e24ac7a..e996e1738b9 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -35,6 +35,7 @@ #include "sql_test.h" #include <my_bit.h> #include "opt_trace.h" +#include "optimizer_defaults.h" /* This file contains optimizations for semi-join subqueries. @@ -1456,8 +1457,8 @@ void get_delayed_table_estimates(TABLE *table, hash_sj_engine->tmp_table->s->reclength); /* Do like in handler::ha_scan_and_compare_time, but ignore the where cost */ - *scan_time= ((data_size/table->file->stats.block_size+2) * - table->file->avg_io_cost()) + *out_rows * file->ROW_COPY_COST; + *scan_time= ((data_size/IO_SIZE * table->file->avg_io_cost()) + + *out_rows * file->ROW_COPY_COST); } @@ -2580,11 +2581,9 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) We don't need to check the where clause for each row, so no WHERE_COST is needed. */ - scan_cost= (TABLE_SCAN_SETUP_COST + - (cost.block_size == 0 ? 0 : - ((rowlen * (double) sjm->rows) / cost.block_size + - TABLE_SCAN_SETUP_COST))); + scan_cost= (rowlen * (double) sjm->rows) / cost.block_size; total_cost= (scan_cost * cost.cache_hit_ratio * cost.avg_io_cost + + TABLE_SCAN_SETUP_COST_THD(thd) + row_copy_cost * sjm->rows); sjm->scan_cost.convert_from_cost(total_cost); @@ -2684,8 +2683,6 @@ get_tmp_table_costs(THD *thd, double row_count, uint row_size, bool blobs_used, bool add_copy_cost) { TMPTABLE_COSTS cost; - double row_copy_cost= add_copy_cost ? ROW_COPY_COST_THD(thd) : 0; - /* From heap_prepare_hp_create_info(), assuming one hash key used */ row_size+= sizeof(char*)*2; row_size= MY_ALIGN(MY_MAX(row_size, sizeof(char*)) + 1, sizeof(char*)); @@ -2693,24 +2690,31 @@ get_tmp_table_costs(THD *thd, double row_count, uint row_size, bool blobs_used, if (row_count > thd->variables.max_heap_table_size / (double) row_size || blobs_used) { + double row_copy_cost= (add_copy_cost ? + tmp_table_optimizer_costs.row_copy_cost : + 0); /* Disk based table */ - cost.lookup= ((DISK_TEMPTABLE_LOOKUP_COST * - thd->optimizer_cache_hit_ratio)) + row_copy_cost; - cost.write= cost.lookup + row_copy_cost; + cost.lookup= ((tmp_table_optimizer_costs.key_lookup_cost * + tmp_table_optimizer_costs.disk_read_ratio) + + row_copy_cost); + cost.write= cost.lookup; cost.create= DISK_TEMPTABLE_CREATE_COST; cost.block_size= DISK_TEMPTABLE_BLOCK_SIZE; - cost.avg_io_cost= 1.0; - cost.cache_hit_ratio= thd->optimizer_cache_hit_ratio; + cost.avg_io_cost= tmp_table_optimizer_costs.disk_read_cost; + cost.cache_hit_ratio= tmp_table_optimizer_costs.disk_read_ratio; } else { /* Values are as they are in heap.h */ + double row_copy_cost= (add_copy_cost ? + heap_optimizer_costs.row_copy_cost : + 0); cost.lookup= HEAP_TEMPTABLE_LOOKUP_COST + row_copy_cost; - cost.write= cost.lookup + row_copy_cost; + cost.write= cost.lookup; cost.create= HEAP_TEMPTABLE_CREATE_COST; - cost.block_size= 0; - cost.avg_io_cost= HEAP_TEMPTABLE_LOOKUP_COST; - cost.cache_hit_ratio= 1.0; + cost.block_size= 1; + cost.avg_io_cost= 0; + cost.cache_hit_ratio= 0; } return cost; } @@ -3181,7 +3185,7 @@ bool Sj_materialization_picker::check_qep(JOIN *join, if (unlikely(trace.trace_started())) { trace. - add("records", *record_count). + add("rows", *record_count). add("cost", *read_time); } return TRUE; @@ -3235,7 +3239,7 @@ bool Sj_materialization_picker::check_qep(JOIN *join, best_access_path(join, join->positions[i].table, rem_tables, join->positions, i, disable_jbuf, prefix_rec_count, &curpos, &dummy); - prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_read); + prefix_rec_count= COST_MULT(prefix_rec_count, curpos.records_out); prefix_cost= COST_ADD(prefix_cost, curpos.read_time); //TODO: take into account join condition selectivity here } @@ -3262,7 +3266,7 @@ bool Sj_materialization_picker::check_qep(JOIN *join, if (unlikely(trace.trace_started())) { trace. - add("records", *record_count). + add("rows", *record_count). add("cost", *read_time); } return TRUE; @@ -3363,7 +3367,7 @@ bool LooseScan_picker::check_qep(JOIN *join, if (unlikely(trace.trace_started())) { trace. - add("records", *record_count). + add("rows", *record_count). add("cost", *read_time); } return TRUE; @@ -3461,7 +3465,7 @@ bool Firstmatch_picker::check_qep(JOIN *join, - remove fanout added by the last table */ if (*record_count) - *record_count /= join->positions[idx].records_read; + *record_count /= join->positions[idx].records_out; } else { @@ -3482,7 +3486,7 @@ bool Firstmatch_picker::check_qep(JOIN *join, if (unlikely(trace.trace_started())) { trace. - add("records", *record_count). + add("rows", *record_count). add("cost", *read_time); } return TRUE; @@ -3609,21 +3613,22 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, */ uint first_tab= first_dupsweedout_table; double dups_cost; - double prefix_rec_count; + double first_weedout_table_rec_count; double sj_inner_fanout= 1.0; double sj_outer_fanout= 1.0; uint temptable_rec_size; if (first_tab == join->const_tables) { - prefix_rec_count= 1.0; + first_weedout_table_rec_count= 1.0; temptable_rec_size= 0; dups_cost= 0.0; } else { dups_cost= join->positions[first_tab - 1].prefix_cost; - prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; + first_weedout_table_rec_count= + join->positions[first_tab - 1].prefix_record_count; temptable_rec_size= 8; /* This is not true but we'll make it so */ } @@ -3659,17 +3664,14 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, sj_outer_fanout, temptable_rec_size, 0, 0); - double write_cost= - COST_ADD(one_cost.create, - COST_MULT(join->positions[first_tab].prefix_record_count, - sj_outer_fanout * one_cost.write)); - double full_lookup_cost= - COST_MULT(join->positions[first_tab].prefix_record_count, - COST_MULT(sj_outer_fanout, - sj_inner_fanout * one_cost.lookup)); - *read_time= COST_ADD(dups_cost, COST_ADD(write_cost, full_lookup_cost)); + double prefix_record_count= join->positions[first_tab].prefix_record_count; + double write_cost= (one_cost.create + + prefix_record_count * sj_outer_fanout * one_cost.write); + double full_lookup_cost= (prefix_record_count * sj_outer_fanout * + sj_inner_fanout * one_cost.lookup); + *read_time= dups_cost + write_cost + full_lookup_cost; - *record_count= prefix_rec_count * sj_outer_fanout; + *record_count= first_weedout_table_rec_count * sj_outer_fanout; *handled_fanout= dups_removed_fanout; *strategy= SJ_OPT_DUPS_WEEDOUT; if (unlikely(join->thd->trace_started())) @@ -3677,7 +3679,10 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, Json_writer_object trace(join->thd); trace. add("strategy", "DuplicateWeedout"). - add("records", *record_count). + add("prefix_row_count", prefix_record_count). + add("tmp_table_rows", sj_outer_fanout). + add("sj_inner_fanout", sj_inner_fanout). + add("rows", *record_count). add("dups_cost", dups_cost). add("write_cost", write_cost). add("full_lookup_cost", full_lookup_cost). @@ -3881,7 +3886,7 @@ static void recalculate_prefix_record_count(JOIN *join, uint start, uint end) prefix_count= 1.0; else prefix_count= COST_MULT(join->best_positions[j-1].prefix_record_count, - join->best_positions[j-1].records_read); + join->best_positions[j-1].records_out); join->best_positions[j].prefix_record_count= prefix_count; } @@ -4033,7 +4038,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) join->best_positions, i, FALSE, prefix_rec_count, join->best_positions + i, &dummy); - prefix_rec_count *= join->best_positions[i].records_read; + prefix_rec_count *= join->best_positions[i].records_out; rem_tables &= ~join->best_positions[i].table->table->map; } } @@ -4075,7 +4080,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) TRUE /* no jbuf */, record_count, join->best_positions + idx, &dummy); } - record_count *= join->best_positions[idx].records_read; + record_count *= join->best_positions[idx].records_out; rem_tables &= ~join->best_positions[idx].table->table->map; } } @@ -4133,7 +4138,7 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) } } rem_tables &= ~join->best_positions[idx].table->table->map; - record_count *= join->best_positions[idx].records_read; + record_count *= join->best_positions[idx].records_out; } first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN; first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables); @@ -5350,7 +5355,8 @@ int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, Got a table that's not within any semi-join nest. This is a case like this: - SELECT * FROM ot1, nt1 WHERE ot1.col IN (SELECT expr FROM it1, it2) + SELECT * FROM ot1, nt1 WHERE + ot1.col IN (SELECT expr FROM it1, it2) with a join order of @@ -6762,7 +6768,7 @@ bool JOIN::choose_subquery_plan(table_map join_tables) Json_writer_object trace_wrapper(thd); Json_writer_object trace_subquery(thd, "subquery_plan"); trace_subquery. - add("records", inner_record_count_1). + add("rows", inner_record_count_1). add("materialization_cost", materialize_strategy_cost). add("in_exist_cost", in_exists_strategy_cost). add("choosen", strategy); diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index 4ba90f6c60b..b0053d3db14 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -226,15 +226,17 @@ public: if (!(found_part & 1 ) && /* no usable ref access for 1st key part */ s->table->covering_keys.is_set(key)) { + double records, read_time; part1_conds_met= TRUE; DBUG_PRINT("info", ("Can use full index scan for LooseScan")); /* Calculate the cost of complete loose index scan. */ - double records= rows2double(s->table->file->stats.records); + records= rows2double(s->table->file->stats.records); /* The cost is entire index scan cost (divided by 2) */ - double read_time= s->table->file->ha_keyread_and_copy_time(key, 1, - (ha_rows) records); + read_time= s->table->file->ha_keyread_and_copy_time(key, 1, + (ha_rows) records, + 0); /* Now find out how many different keys we will get (for now we diff --git a/sql/opt_trace.cc b/sql/opt_trace.cc index 374fc41aba8..d7b3d83bb18 100644 --- a/sql/opt_trace.cc +++ b/sql/opt_trace.cc @@ -696,8 +696,8 @@ void print_best_access_for_table(THD *thd, POSITION *pos) Json_writer_object obj(thd, "chosen_access_method"); obj. add("type", pos->type == JT_ALL ? "scan" : join_type_str[pos->type]). - add("records_read", pos->records_read). - add("records_out", pos->records_out). + add("rows_read", pos->records_read). + add("rows_out", pos->records_out). add("cost", pos->read_time). add("uses_join_buffering", pos->use_join_buffer); if (pos->range_rowid_filter_info) diff --git a/sql/optimizer_costs.h b/sql/optimizer_costs.h index de933969131..698cdbfe41e 100644 --- a/sql/optimizer_costs.h +++ b/sql/optimizer_costs.h @@ -18,41 +18,79 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ -/* This file includes costs variables used by the optimizer */ - /* - The table/index cache hit ratio in %. 0 means that a searched for key or row - will never be in the cache while 100 means it always in the cache. - - According to folklore, one need at least 80 % hit rate in the cache for - MariaDB to run very well. We set CACHE_HIT_RATIO to a bit smaller - as there is still a cost involved in finding the row in the B tree, hash - or other seek structure. + This file defines costs structures and cost functions used by the optimizer +*/ - Increasing CACHE_HIT_RATIO will make MariaDB prefer key lookups over - table scans as the impact of ROW_COPY_COST and INDEX_COPY cost will - have a larger impact when more rows are exmined.. - Note that avg_io_cost() is multipled with this constant! +/* + OPTIMIZER_COSTS stores cost variables for each engine. They are stored + in linked_optimizer_costs (pointed to by handlerton) and TABLE_SHARE. */ -#define DEFAULT_CACHE_HIT_RATIO 50 -/* Convert ratio to cost */ - -static inline double cache_hit_ratio(uint ratio) +#define OPTIMIZER_COST_UNDEF -1.0 +struct OPTIMIZER_COSTS { - return (((double) (100 - ratio)) / 100.0); -} + double disk_read_cost; + double index_block_copy_cost; + double key_cmp_cost; + double key_copy_cost; + double key_lookup_cost; + double key_next_find_cost; + double disk_read_ratio; + double row_copy_cost; + double row_lookup_cost; + double row_next_find_cost; + double rowid_cmp_cost; + double rowid_copy_cost; + double initialized; // Set if default or connected with handlerton +}; + +/* Default optimizer costs */ +extern OPTIMIZER_COSTS default_optimizer_costs; +/* + These are used to avoid taking mutex while creating tmp tables + These are created once after the server is started so they are + not dynamic. +*/ +extern OPTIMIZER_COSTS heap_optimizer_costs, tmp_table_optimizer_costs; /* - Base cost for finding keys and rows from the engine is 1.0 - All other costs should be proportional to these + Interface to the engine cost variables. See optimizer_defaults.h for + the default values. */ -/* Cost for finding the first key in a key scan */ -#define KEY_LOOKUP_COST ((double) 1.0) -/* Cost of finding a key from a row_ID (not used for clustered keys) */ -#define ROW_LOOKUP_COST ((double) 1.0) +#define DISK_READ_RATIO costs->disk_read_ratio +#define KEY_LOOKUP_COST costs->key_lookup_cost +#define ROW_LOOKUP_COST costs->row_lookup_cost +#define INDEX_BLOCK_COPY_COST costs->index_block_copy_cost +#define KEY_COPY_COST costs->key_copy_cost +#define ROW_COPY_COST costs->row_copy_cost +#define ROW_COPY_COST_THD(THD) default_optimizer_costs.row_copy_cost +#define KEY_NEXT_FIND_COST costs->key_next_find_cost +#define ROW_NEXT_FIND_COST costs->row_next_find_cost +#define KEY_COMPARE_COST costs->key_cmp_cost +#define SORT_INDEX_CMP_COST default_optimizer_costs.key_cmp_cost +#define DISK_READ_COST costs->disk_read_cost +#define DISK_READ_COST_THD(thd) default_optimizer_costs.disk_read_cost + +/* Cost of comparing two rowids. This is set relative to KEY_COMPARE_COST */ +#define ROWID_COMPARE_COST costs->rowid_cmp_cost +#define ROWID_COMPARE_COST_THD(THD) default_optimizer_costs.rowid_cmp_cost + +/* Cost of comparing two rowids. This is set relative to KEY_COPY_COST */ +#define ROWID_COPY_COST costs->rowid_copy_cost + +/* Engine unrelated costs. Stored in THD so that the user can change them */ +#define WHERE_COST optimizer_where_cost +#define WHERE_COST_THD(THD) ((THD)->variables.optimizer_where_cost) +#define TABLE_SCAN_SETUP_COST optimizer_scan_setup_cost +#define TABLE_SCAN_SETUP_COST_THD(THD) (THD)->variables.optimizer_scan_setup_cost +#define INDEX_SCAN_SETUP_COST optimizer_scan_setup_cost/2 + +/* Default fill factors of an (b-tree) index block is assumed to be 0.75 */ +#define INDEX_BLOCK_FILL_FACTOR_DIV 3 +#define INDEX_BLOCK_FILL_FACTOR_MUL 4 /* These constants impact the cost of QSORT and priority queue sorting, @@ -68,94 +106,13 @@ static inline double cache_hit_ratio(uint ratio) */ #define QSORT_SORT_SLOWNESS_CORRECTION_FACTOR (0.1) #define PQ_SORT_SLOWNESS_CORRECTION_FACTOR (0.1) -/* - Cost of finding and copying keys from the storage engine index cache to - an internal cache as part of an index scan. - Used in handler::keyread_time() -*/ -#define DEFAULT_INDEX_BLOCK_COPY_COST ((double) 1 / 5.0) -#define INDEX_BLOCK_COPY_COST(THD) ((THD)->variables.optimizer_index_block_copy_cost) - -/* - Cost of finding the next row during table scan and copying it to - 'table->record'. - If this is too small, then table scans will be prefered over 'ref' - as with table scans there are no key read (KEY_LOOKUP_COST), fewer - disk reads but more record copying and row comparisions. If it's - too big then MariaDB will used key lookup even when table scan is - better. -*/ -#define DEFAULT_ROW_COPY_COST ((double) 1.0 / 20.0) -#define ROW_COPY_COST optimizer_row_copy_cost -#define ROW_COPY_COST_THD(THD) ((THD)->variables.optimizer_row_copy_cost) /* Creating a record from the join cache is faster than getting a row from the engine. JOIN_CACHE_ROW_COPY_COST_FACTOR is the factor used to take this into account. This is multiplied with ROW_COPY_COST. */ -#define JOIN_CACHE_ROW_COPY_COST_FACTOR 0.75 - -/* - Cost of finding the next key during index scan and copying it to - 'table->record' - - If this is too small, then index scans will be prefered over 'ref' - as with table scans there are no key read (KEY_LOOKUP_COST) and - fewer disk reads. -*/ -#define DEFAULT_KEY_COPY_COST ((double) 1.0 / 40.0) -#define KEY_COPY_COST optimizer_key_copy_cost -#define KEY_COPY_COST_THD(THD) ((THD)->variables.optimizer_key_copy_cost) - -/* - Cost of finding the next index entry and checking it against filter - This cost is very low as it's done inside the storage engine. - Should be smaller than KEY_COPY_COST. - */ -#define DEFAULT_KEY_NEXT_FIND_COST ((double) 1.0 / 80.0) -#define KEY_NEXT_FIND_COST optimizer_next_find_cost - -/** - The following is used to decide if MariaDB should use table scanning - instead of reading with keys. The number says how many evaluation of the - WHERE clause is comparable to reading one extra row from a table. -*/ -#define DEFAULT_WHERE_COST (1 / 5.0) -#define WHERE_COST optimizer_where_cost -#define WHERE_COST_THD(THD) ((THD)->variables.optimizer_where_cost) - -#define DEFAULT_KEY_COMPARE_COST (1 / 20.0) -#define KEY_COMPARE_COST optimizer_key_cmp_cost - -/* - Cost of comparing two rowids. This is set relative to KEY_COMPARE_COST - This is usally just a memcmp! -*/ -#define ROWID_COMPARE_COST KEY_COMPARE_COST/10.0 -#define ROWID_COMPARE_COST_THD(THD) ((THD)->variables.KEY_COMPARE_COST / 10.0) - -/* - Setup cost for different operations -*/ - -/* Extra cost for doing a range scan. Used to prefer 'ref' over range */ -#define MULTI_RANGE_READ_SETUP_COST (double) (1.0 / 50.0) - -/* - These costs are mainly to handle small tables, like the one we have in the - mtr test suite -*/ -/* Extra cost for full table scan. Used to prefer range over table scans */ -#define TABLE_SCAN_SETUP_COST 1.0 -/* Extra cost for full index scan. Used to prefer range over index scans */ -#define INDEX_SCAN_SETUP_COST 1.0 - -/* - The lower bound of accepted rows when using filter. - This is used to ensure that filters are not too agressive. -*/ -#define MIN_ROWS_AFTER_FILTERING 1.0 +#define JOIN_CACHE_ROW_COPY_COST_FACTOR(thd) 1.0 /* cost1 is better that cost2 only if cost1 + COST_EPS < cost2 @@ -163,33 +120,8 @@ static inline double cache_hit_ratio(uint ratio) when there are identical plans. Without COST_EPS some plans in the test suite would vary depending on floating point calculations done in different paths. - */ -#define COST_EPS 0.0001 - -/* - For sequential disk seeks the cost formula is: - DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip - - The cost of average seek - DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0. -*/ -#define DISK_SEEK_BASE_COST ((double)0.9) - -#define BLOCKS_IN_AVG_SEEK 128 - -#define DISK_SEEK_PROP_COST ((double)0.1/BLOCKS_IN_AVG_SEEK) - -/* - Subquery materialization-related constants */ -/* This should match ha_heap::read_time() */ -#define HEAP_TEMPTABLE_LOOKUP_COST 0.05 -#define HEAP_TEMPTABLE_CREATE_COST 1.0 -#define DISK_TEMPTABLE_LOOKUP_COST 1.0 -#define DISK_TEMPTABLE_CREATE_COST TMPFILE_CREATE_COST*2 /* 2 tmp tables */ -#define DISK_TEMPTABLE_BLOCK_SIZE 8192 - -#define SORT_INDEX_CMP_COST 0.02 +#define COST_EPS 0.0000001 #define COST_MAX (DBL_MAX * (1.0 - DBL_EPSILON)) @@ -207,4 +139,22 @@ static inline double COST_MULT(double c, double f) return (COST_MAX / (f) > (c) ? (c) * (f) : COST_MAX); } +OPTIMIZER_COSTS *get_optimizer_costs(const LEX_CSTRING *cache_name); +OPTIMIZER_COSTS *create_optimizer_costs(const char *name, size_t length); +OPTIMIZER_COSTS *get_or_create_optimizer_costs(const char *name, + size_t length); +bool create_default_optimizer_costs(); +void copy_tmptable_optimizer_costs(); +void free_all_optimizer_costs(); +struct TABLE; + +extern "C" +{ + typedef int (*process_optimizer_costs_t) (const LEX_CSTRING *, + const OPTIMIZER_COSTS *, + TABLE *); + bool process_optimizer_costs(process_optimizer_costs_t func, TABLE *param); +} + + #endif /* OPTIMIZER_COSTS_INCLUDED */ diff --git a/sql/optimizer_defaults.h b/sql/optimizer_defaults.h new file mode 100644 index 00000000000..8d74bb91cc3 --- /dev/null +++ b/sql/optimizer_defaults.h @@ -0,0 +1,183 @@ +#ifndef OPTIMIZER_DEFAULTS_INCLUDED +#define OPTIMIZER_DEFAULTS_INCLUDED +/* + Copyright (c) 2022, MariaDB AB + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +/* + This file contains costs constants used by the optimizer + All costs should be based on milliseconds (1 cost = 1 ms) +*/ + +/* Cost for finding the first key in a key scan */ +#define DEFAULT_KEY_LOOKUP_COST ((double) 0.000435777) + +/* Cost of finding a row based on row_ID */ +#define DEFAULT_ROW_LOOKUP_COST ((double) 0.000130839) + +/* + Cost of finding and copying key and row blocks from the storage + engine index cache to an internal cache as part of an index + scan. This includes all mutexes that needs to be taken to get + exclusive access to a page. The number is taken from accessing an + existing blocks from Aria page cache. + Used in handler::scan_time() and handler::keyread_time() +*/ +#define DEFAULT_INDEX_BLOCK_COPY_COST ((double) 3.56e-05) + +/* + Cost of copying a row to 'table->record'. + Used by scan_time() and rnd_pos_time() methods. + + If this is too small, then table scans will be prefered over 'ref' + as with table scans there are no key read (KEY_LOOKUP_COST), fewer + disk reads but more record copying and row comparisions. If it's + too big then MariaDB will used key lookup even when table scan is + better. +*/ +#define DEFAULT_ROW_COPY_COST ((double) 0.000060866) + +/* + Cost of copying the key to 'table->record' + + If this is too small, then, for small tables, index scans will be + prefered over 'ref' as with index scans there are fewer disk reads. +*/ +#define DEFAULT_KEY_COPY_COST ((double) 0.000015685) + +/* + Cost of finding the next index entry and checking its rowid against filter + This cost is very low as it's done inside the storage engine. + Should be smaller than KEY_COPY_COST. + */ +#define DEFAULT_KEY_NEXT_FIND_COST ((double) 0.000082347) + +/* Cost of finding the next row when scanning a table */ +#define DEFAULT_ROW_NEXT_FIND_COST ((double) 0.000045916) + +/** + The cost of executing the WHERE clause as part of any row check. + Increasing this would force the optimizer to use row combinations + that reads fewer rows. + The default cost comes from recording times from a simple where clause that + compares two fields (date and a double) with constants. +*/ +#define DEFAULT_WHERE_COST ((double) 3.2e-05) + +/* The cost of comparing a key when using range access or sorting */ +#define DEFAULT_KEY_COMPARE_COST 0.000011361 + +/* Rowid compare is usually just a single memcmp of a short string */ +#define DEFAULT_ROWID_COMPARE_COST 0.000002653 +/* Rowid copy is usually just a single memcpy of a short string */ +#define DEFAULT_ROWID_COPY_COST 0.000002653 + +/* + Average disk seek time on a hard disk is 8-10 ms, which is also + about the time to read a IO_SIZE (8192) block. + + A medium ssd is about 400MB/second, which gives us the time for + reading an IO_SIZE block to IO_SIZE/400000000 = 0.0000204 sec= 0.02 ms. +*/ +#define DEFAULT_DISK_READ_COST ((double) IO_SIZE / 400000000.0 * 1000) + +/* + The follwoing is an old comment for hard-disks, please ignore the + following, except if you like history: + + For sequential hard disk seeks the cost formula is: + DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip + + The cost of average seek + DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK = 10. +*/ + + +/* + The table/index cache_miss/total_cache_request ratio. + 1.0 means that a searched for key or row will never be in the cache while + 0.0 means it always in the cache (and we don't have to do any disk reads). + + According to folklore, one should not have to access disk for more + than 20% of the cache request for MariaDB to run very well. + However in practice when we read rows or keys in a query, we will often + read the same row over and over again. Because of this we set + DEFAULT_DISK_READ_RATIO to 0.20/10 = 0.02. + + Increasing DISK_READ_RATIO will make MariaDB prefer key lookup over + table scans as the impact of ROW_COPY_COST and INDEX_COPY cost will + have a larger impact when more rows are examined.. + + We are not yet taking into account cache usage statistics as this + could confuse users as the EXPLAIN and costs for a query would change + between to query calls, which may confuse users (and also make the + mtr tests very unpredictable). + + Note that the engine's avg_io_cost() (DEFAULT_DISK_READ_COST by default) + is multiplied with this constant! +*/ + +#define DEFAULT_DISK_READ_RATIO 0.02 + +/* + The following costs are mainly to ensure we don't do table and index + scans for small tables, like the one we have in the mtr test suite. + + This is mostly to keep the mtr tests use indexes (as the optimizer would + if the tables are large). It will also ensure that EXPLAIN is showing + more key user for users where they are testing queries with small tables + at the start of projects. + This is probably OK for most a the execution time difference between table + scan and index scan compared to key lookups so small when using small + tables. It also helps to fill the index cache which will help mitigate + the speed difference. +*/ + +/* + Extra cost for full table and index scan. Used to prefer key and range + over index and table scans + + INDEX_SCAN_SETUP_COST (defined in optimizer_costs.h) is half of + table_scan_setup_cost to get the optimizer to prefer index scans to table + scans as key copy is faster than row copy and index blocks provides + more information in the cache. + + This will also help MyISAM as with MyISAM the table scans has a cost + very close to index scans (they are fast but require a read call + that we want to avoid even if it's small). + + 10 usec is about 10 MyISAM row lookups with optimizer_disk_read_ratio= 0.02 +*/ +#define DEFAULT_TABLE_SCAN_SETUP_COST 0.01 // 10 usec + +/* Extra cost for doing a range scan. Used to prefer 'ref' over range */ +#define MULTI_RANGE_READ_SETUP_COST KEY_LOOKUP_COST + +/* + Temporary file and temporary table related costs + Used with subquery materialization, derived tables etc +*/ + +#define TMPFILE_CREATE_COST 0.5 // Cost of creating and deleting files +#define HEAP_TEMPTABLE_CREATE_COST 0.025 // ms +/* Cost taken from HEAP_LOOKUP_COST in ha_heap.cc */ +#define HEAP_TEMPTABLE_LOOKUP_COST (0.00016097*1000 + heap_optimizer_costs.row_copy_cost) +#define DISK_TEMPTABLE_LOOKUP_COST(thd) (tmp_table_optimizer_costs.key_lookup_cost + tmp_table_optimizer_costs.row_lookup_cost + tmp_table_optimizer_costs.row_copy_cost) +#define DISK_TEMPTABLE_CREATE_COST TMPFILE_CREATE_COST*2 // 2 tmp tables +#define DISK_TEMPTABLE_BLOCK_SIZE IO_SIZE + +#endif /* OPTIMIZER_DEFAULTS_INCLUDED */ diff --git a/sql/rowid_filter.cc b/sql/rowid_filter.cc index c0f7fe0755a..4f713edb47f 100644 --- a/sql/rowid_filter.cc +++ b/sql/rowid_filter.cc @@ -32,7 +32,7 @@ lookup_cost(Rowid_filter_container_type cont_type) { switch (cont_type) { case SORTED_ARRAY_CONTAINER: - return log(est_elements)*0.01+key_next_find_cost; + return log(est_elements) * rowid_compare_cost + base_lookup_cost; default: DBUG_ASSERT(0); return 0; @@ -125,11 +125,13 @@ void Range_rowid_filter_cost_info::init(Rowid_filter_container_type cont_type, key_no= idx; est_elements= (ulonglong) table->opt_range[key_no].rows; cost_of_building_range_filter= build_cost(container_type); + where_cost= tab->in_use->variables.optimizer_where_cost; - key_next_find_cost= tab->in_use->variables.optimizer_key_next_find_cost; + base_lookup_cost= tab->file->ROW_NEXT_FIND_COST; + rowid_compare_cost= tab->file->ROWID_COMPARE_COST; selectivity= est_elements/((double) table->stat_records()); gain= avg_access_and_eval_gain_per_row(container_type, - tab->file->optimizer_cache_cost); + tab->file->ROW_LOOKUP_COST); if (gain > 0) cross_x= cost_of_building_range_filter/gain; else @@ -147,15 +149,18 @@ double Range_rowid_filter_cost_info::build_cost(Rowid_filter_container_type cont_type) { double cost; + OPTIMIZER_COSTS *costs= &table->s->optimizer_costs; DBUG_ASSERT(table->opt_range_keys.is_set(key_no)); - cost= table->opt_range[key_no].index_only_fetch_cost(table->in_use); + /* Cost of fetching keys */ + cost= table->opt_range[key_no].index_only_fetch_cost(table); switch (cont_type) { - case SORTED_ARRAY_CONTAINER: - cost+= ARRAY_WRITE_COST * est_elements; /* cost filling the container */ - cost+= ARRAY_SORT_C * est_elements * log(est_elements); /* sorting cost */ + /* Add cost of filling container and cost of sorting */ + cost= (est_elements * + (costs->rowid_copy_cost + // Copying rowid + costs->rowid_cmp_cost * log2(est_elements))); // Sort break; default: DBUG_ASSERT(0); diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h index 866b52b156b..46664c18faa 100644 --- a/sql/rowid_filter.h +++ b/sql/rowid_filter.h @@ -143,20 +143,6 @@ class SQL_SELECT; class Rowid_filter_container; class Range_rowid_filter_cost_info; -/* - Cost to write rowid into array. Assume inserting 1000 row id's into the - array has same cost as a 'disk io' or key fetch -*/ -#define ARRAY_WRITE_COST 0.001 -/* - Factor used to calculate cost of sorting rowids in array - This is multiplied by 'elements * log(elements)', so this factor - has a very high cost weight! - A value of 0.001 will have 200 rows have a cost of 1.05 and - 1000 rows a cost of 6.90. -*/ -#define ARRAY_SORT_C 0.001 - typedef enum { SORTED_ARRAY_CONTAINER, @@ -395,7 +381,8 @@ class Range_rowid_filter_cost_info final: public Sql_alloc /* The index whose range scan would be used to build the range filter */ uint key_no; double cost_of_building_range_filter; - double where_cost, key_next_find_cost; + double where_cost, base_lookup_cost, rowid_compare_cost; + /* (gain*row_combinations)-cost_of_building_range_filter yields the gain of the filter for 'row_combinations' key tuples of the index key_no diff --git a/sql/set_var.cc b/sql/set_var.cc index 8cb5fcd4870..274ee07f07d 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -310,7 +310,13 @@ do { \ case SHOW_HA_ROWS: do_num_val (ha_rows,CMD); #define case_for_double(CMD) \ - case SHOW_DOUBLE: do_num_val (double,CMD) + case SHOW_DOUBLE: do_num_val (double,CMD); \ + case SHOW_OPTIMIZER_COST: \ + { \ + double val= ((*(double*) value) == OPTIMIZER_COST_UNDEF ? OPTIMIZER_COST_UNDEF : \ + (*(double*) value) * 1000); \ + CMD; \ + } while (0) #define case_get_string_as_lex_string \ case SHOW_CHAR: \ diff --git a/sql/set_var.h b/sql/set_var.h index 570703a8222..38a395adf0f 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -84,7 +84,7 @@ protected: typedef bool (*on_update_function)(sys_var *self, THD *thd, enum_var_type type); int flags; ///< or'ed flag_enum values - const SHOW_TYPE show_val_type; ///< what value_ptr() returns for sql_show.cc + SHOW_TYPE show_val_type; ///< what value_ptr() returns for sql_show.cc PolyLock *guard; ///< *second* lock that protects the variable ptrdiff_t offset; ///< offset to the value from global_system_variables on_check_function on_check; diff --git a/sql/sql_bitmap.h b/sql/sql_bitmap.h index 02dc8198c7c..61b3df2d086 100644 --- a/sql/sql_bitmap.h +++ b/sql/sql_bitmap.h @@ -270,13 +270,21 @@ public: { return buffer[0]; } - uint bits_set() + uint bits_set() const { uint res= 0; for (size_t i= 0; i < ARRAY_ELEMENTS; i++) - res += my_count_bits(buffer[i]); + if (buffer[i]) + res+= my_count_bits(buffer[i]); return res; } + uint find_first_bit() const + { + for (size_t i= 0; i < ARRAY_ELEMENTS; i++) + if (buffer[i]) + return (uint)i*BITS_PER_ELEMENT + my_find_first_bit(buffer[i]); + return width; + } class Iterator { const Bitmap& map; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 4074481880c..9b064b617cb 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1236,7 +1236,6 @@ void THD::init() */ variables.pseudo_thread_id= thread_id; variables.default_master_connection.str= default_master_connection_buff; - optimizer_cache_hit_ratio= cache_hit_ratio(variables.optimizer_cache_hit_ratio); ::strmake(default_master_connection_buff, global_system_variables.default_master_connection.str, variables.default_master_connection.length); diff --git a/sql/sql_class.h b/sql/sql_class.h index 845b1bad024..26d64c28fe2 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -692,9 +692,7 @@ typedef struct system_variables ulonglong slave_skip_counter; ulonglong max_relay_log_size; - double optimizer_index_block_copy_cost, optimizer_key_next_find_cost; - double optimizer_row_copy_cost, optimizer_key_copy_cost; - double optimizer_where_cost, optimizer_key_cmp_cost; + double optimizer_where_cost, optimizer_scan_setup_cost; double long_query_time_double, max_statement_time_double; double sample_percentage; @@ -793,7 +791,6 @@ typedef struct system_variables uint group_concat_max_len; uint eq_range_index_dive_limit; - uint optimizer_cache_hit_ratio; // Stored in handler::optimizer_cache_cost uint idle_transaction_timeout; uint idle_readonly_transaction_timeout; uint idle_write_transaction_timeout; @@ -831,7 +828,6 @@ typedef struct system_variables my_bool session_track_user_variables; #endif // USER_VAR_TRACKING my_bool tcp_nodelay; - plugin_ref table_plugin; plugin_ref tmp_table_plugin; plugin_ref enforced_table_plugin; @@ -2677,7 +2673,6 @@ public: struct system_status_var org_status_var; // For user statistics struct system_status_var *initial_status_var; /* used by show status */ THR_LOCK_INFO lock_info; // Locking info of this thread - double optimizer_cache_hit_ratio; // From optimizer_cache_hit_ratio /** Protects THD data accessed from other threads: @@ -7426,6 +7421,13 @@ inline void handler::decrement_statistics(ulong SSV::*offset) const status_var_decrement(table->in_use->status_var.*offset); } +/* Update references in the handler to the table */ + +inline void handler::set_table(TABLE* table_arg) +{ + table= table_arg; + costs= &table_arg->s->optimizer_costs; +} inline int handler::ha_ft_read(uchar *buf) { diff --git a/sql/sql_const.h b/sql/sql_const.h index 98803989f51..11aadd11174 100644 --- a/sql/sql_const.h +++ b/sql/sql_const.h @@ -121,11 +121,11 @@ /* This is used when reading large blocks, sequential read. - We assume that reading this much will be the same cost as 1 seek / fetching - one row from the storage engine. + We assume that reading this much will be roughly the same cost as 1 + seek / fetching one row from the storage engine. + Cost of one read of DISK_CHUNK_SIZE is DISK_SEEK_BASE_COST (ms). */ #define DISK_CHUNK_SIZE (uint) (65536) /* Size of diskbuffer for tmpfiles */ -#define TMPFILE_CREATE_COST 2.0 /* Creating and deleting tmp file */ #define FRM_VER_TRUE_VARCHAR (FRM_VER+4) /* 10 */ #define FRM_VER_EXPRESSSIONS (FRM_VER+5) /* 11 */ @@ -204,8 +204,14 @@ #define MIN_ROWS_TO_USE_TABLE_CACHE 100 #define MIN_ROWS_TO_USE_BULK_INSERT 100 +/* + The lower bound of accepted rows when using filter. + This is used to ensure that filters are not too agressive. +*/ +#define MIN_ROWS_AFTER_FILTERING 1.0 + /** - Number of rows in a reference table when refereed through a not unique key. + Number of rows in a reference table when refered through a not unique key. This value is only used when we don't know anything about the key distribution. */ diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc index ede486fc297..9f907c9ed2c 100644 --- a/sql/sql_explain.cc +++ b/sql/sql_explain.cc @@ -1368,10 +1368,12 @@ double Explain_table_access::get_r_filtered() } -int Explain_table_access::print_explain(select_result_sink *output, uint8 explain_flags, +int Explain_table_access::print_explain(select_result_sink *output, + uint8 explain_flags, bool is_analyze, uint select_id, const char *select_type, - bool using_temporary, bool using_filesort) + bool using_temporary, + bool using_filesort) { THD *thd= output->thd; // note: for SHOW EXPLAIN, this is target thd. MEM_ROOT *mem_root= thd->mem_root; @@ -1999,6 +2001,9 @@ void Explain_table_access::print_explain_json(Explain_query *query, writer->add_double(jbuf_tracker.get_filtered_after_where()*100.0); else writer->add_null(); + + writer->add_member("r_unpack_time_ms"); + writer->add_double(jbuf_unpack_tracker.get_time_ms()); } } diff --git a/sql/sql_explain.h b/sql/sql_explain.h index 38c5c3e6595..42a1c360e5b 100644 --- a/sql/sql_explain.h +++ b/sql/sql_explain.h @@ -753,7 +753,7 @@ public: class Explain_table_access : public Sql_alloc { public: - Explain_table_access(MEM_ROOT *root) : + Explain_table_access(MEM_ROOT *root, bool timed) : derived_select_number(0), non_merged_sjm_number(0), extra_tags(root), @@ -766,6 +766,7 @@ public: pushed_index_cond(NULL), sjm_nest(NULL), pre_join_sort(NULL), + jbuf_unpack_tracker(timed), rowid_filter(NULL) {} ~Explain_table_access() { delete sjm_nest; } @@ -874,6 +875,7 @@ public: Gap_time_tracker extra_time_tracker; Table_access_tracker jbuf_tracker; + Time_and_counter_tracker jbuf_unpack_tracker; Explain_rowid_filter *rowid_filter; diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index 1347e38753d..5fcfe0e0e0a 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -1600,6 +1600,7 @@ bool JOIN_CACHE::put_record() bool JOIN_CACHE::get_record() { bool res; + ANALYZE_START_TRACKING(thd(), join_tab->jbuf_unpack_tracker); uchar *prev_rec_ptr= 0; if (with_length) pos+= size_of_rec_len; @@ -1615,6 +1616,7 @@ bool JOIN_CACHE::get_record() if (prev_cache) prev_cache->get_record_by_pos(prev_rec_ptr); } + ANALYZE_STOP_TRACKING(thd(), join_tab->jbuf_unpack_tracker); return res; } diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h index d4df8c6468f..df5cd37c3c6 100644 --- a/sql/sql_plugin.h +++ b/sql/sql_plugin.h @@ -24,6 +24,7 @@ #define SHOW_always_last SHOW_KEY_CACHE_LONG, \ SHOW_HAVE, SHOW_MY_BOOL, SHOW_HA_ROWS, SHOW_SYS, \ SHOW_LONG_NOFLUSH, SHOW_LEX_STRING, SHOW_ATOMIC_COUNTER_UINT32_T, \ + SHOW_OPTIMIZER_COST, \ /* SHOW_*_STATUS must be at the end, SHOW_LONG_STATUS being first */ \ SHOW_LONG_STATUS, SHOW_DOUBLE_STATUS, SHOW_LONGLONG_STATUS, \ SHOW_UINT32_STATUS diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 0532c6c000c..9a1dfd83508 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -47,6 +47,7 @@ // print_sjm, print_plan, TEST_join #include "records.h" // init_read_record, end_read_record #include "filesort.h" // filesort_free_buffers +#include "filesort_utils.h" // get_qsort_sort_cost #include "sql_union.h" // mysql_union #include "opt_subselect.h" #include "sql_derived.h" @@ -68,6 +69,7 @@ #include "my_json_writer.h" #include "opt_trace.h" #include "create_tmp_table.h" +#include "optimizer_defaults.h" /* A key part number that means we're using a fulltext scan. @@ -99,14 +101,7 @@ #define crash_if_first_double_is_bigger(A,B) DBUG_ASSERT(((A) == 0.0 && (B) == 0.0) || (A)/(B) < 1.0000001) -#define double_to_rows(A) ((A) >= ((double)HA_POS_ERROR) ? HA_POS_ERROR : (ha_rows) (A)) - -/* Cost for reading a row through an index */ -struct INDEX_READ_COST -{ - double read_cost; - double index_only_cost; -}; +#define double_to_rows(A) ((A) >= ((double)HA_ROWS_MAX) ? HA_ROWS_MAX : (ha_rows) (A)) const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref", "MAYBE_REF","ALL","range","index","fulltext", @@ -257,7 +252,6 @@ static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond, bool is_top_and_level); static Item* part_of_refkey(TABLE *form,Field *field); -uint find_shortest_key(TABLE *table, const key_map *usable_keys); static bool test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, key_map usable_keys, int key, @@ -331,7 +325,8 @@ static bool find_order_in_list(THD *, Ref_ptr_array, TABLE_LIST *, ORDER *, List<Item> &, List<Item> &, bool, bool, bool); static double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, - table_map rem_tables); + table_map rem_tables, + double *records_out); void set_postjoin_aggr_write_func(JOIN_TAB *tab); static Item **get_sargable_cond(JOIN *join, TABLE *table); @@ -433,7 +428,7 @@ bool dbug_user_var_equals_str(THD *thd, const char *name, const char* value) POSITION::POSITION() { table= 0; - records_read= cond_selectivity= read_time= records_out= 0.0; + records_read= cond_selectivity= read_time= records_out= records_init= 0.0; prefix_record_count= 0.0; key= 0; forced_index= 0; @@ -1896,6 +1891,13 @@ int JOIN::optimize() res= build_explain(); optimization_state= JOIN::OPTIMIZATION_DONE; } + + /* + Store the cost of this query into a user variable + TODO: calculate a correct cost for a query with subqueries and UNIONs. + */ + if (select_lex->select_number == 1) + thd->status_var.last_query_cost= best_read; return res; } @@ -2045,6 +2047,7 @@ JOIN::optimize_inner() { DBUG_ENTER("JOIN::optimize_inner"); subq_exit_fl= false; + best_read= 0.0; DEBUG_SYNC(thd, "before_join_optimize"); THD_STAGE_INFO(thd, stage_optimizing); @@ -3588,7 +3591,7 @@ bool JOIN::make_aggr_tables_info() TABLE* table= create_tmp_table(thd, curr_tab->tmp_table_param, all_fields, NULL, distinct, - TRUE, select_options, HA_POS_ERROR, + TRUE, select_options, HA_ROWS_MAX, &empty_clex_str, !need_tmp, keep_row_order); if (!table) @@ -4233,7 +4236,7 @@ bool JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order) { tab->filesort= - new (thd->mem_root) Filesort(order, HA_POS_ERROR, tab->keep_current_rowid, + new (thd->mem_root) Filesort(order, HA_ROWS_MAX, tab->keep_current_rowid, tab->select); if (!tab->filesort) return true; @@ -5270,7 +5273,6 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, DYNAMIC_ARRAY *keyuse_array) { int error= 0; - TABLE *UNINIT_VAR(table); /* inited in all loops */ uint i,table_count,const_count,key; uint sort_space; table_map found_const_table_map, all_table_map; @@ -5331,8 +5333,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, for (s= stat, i= 0; (tables= ti++); s++, i++) { TABLE_LIST *embedding= tables->embedding; + TABLE *table= tables->table; stat_vector[i]=s; - table_vector[i]=s->table=table=tables->table; + table_vector[i]= s->table= table; s->tab_list= tables; table->pos_in_table_list= tables; error= tables->fetch_number_of_rows(); @@ -5465,7 +5468,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, for (s= stat ; s < stat_end ; s++) { - table= s->table; + TABLE *table= s->table; for (JOIN_TAB *t= stat ; t < stat_end ; t++) { if (t->dependent & table->map) @@ -5569,7 +5572,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++) { - table=s->table; + TABLE *table= s->table; if (table->is_filled_at_execution()) continue; @@ -5622,7 +5625,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, (*s->on_expr_ref)->is_expensive())) { // system table int tmp= 0; - s->type=JT_SYSTEM; + s->type= JT_SYSTEM; join->const_table_map|=table->map; set_position(join,const_count++,s,(KEYUSE*) 0); if ((tmp= join_read_const_table(join->thd, s, @@ -5825,19 +5828,20 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, s->startup_cost= 0; if (s->type == JT_SYSTEM || s->type == JT_CONST) { - Json_writer_object table_records(thd); - /* Only one matching row */ - s->found_records= s->records= 1; - s->records_out= 1.0; + ha_rows records= 1; + if (s->type == JT_SYSTEM || s->table->file->stats.records == 0) + records= s->table->file->stats.records; + /* zero or one matching row */ + s->records= s->found_records= records; + s->records_init= s->records_out= rows2double(records); s->read_time=1.0; s->worst_seeks=1.0; - table_records.add_table_name(s) - .add("rows", s->found_records) - .add("cost", s->read_time) - .add("table_type", s->type == JT_CONST ? - "const" : - "system"); + table_records.add_table_name(s). + add("rows", s->found_records). + add("cost", s->read_time). + add("table_type", s->type == JT_CONST ? + "const" : "system"); continue; } /* @@ -5889,7 +5893,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, s->table->pos_in_table_list->is_materialized_derived())) // (3) { bool impossible_range= FALSE; - ha_rows records= HA_POS_ERROR; + ha_rows records= HA_ROWS_MAX; SQL_SELECT *select= 0; Item **sargable_cond= NULL; if (!s->const_keys.is_clear_all()) @@ -5956,6 +5960,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, } else { + double records= 1; join->const_table_map|= s->table->map; set_position(join,const_count++,s,(KEYUSE*) 0); s->type= JT_CONST; @@ -5966,7 +5971,10 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, s->info= ET_IMPOSSIBLE_ON_CONDITION; found_const_table_map|= s->table->map; mark_as_null_row(s->table); // All fields are NULL + records= 0; } + s->records_init= s->records_out= records; + s->found_records= s->records= (ha_rows)records; } } if (records != HA_POS_ERROR) @@ -6055,7 +6063,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, for (i= 0; i < join->table_count ; i++) if (double rr= join->best_positions[i].records_read) records= COST_MULT(records, rr); - rows= records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records; + rows= double_to_rows(records); set_if_smaller(rows, unit->lim.get_select_limit()); join->select_lex->increase_derived_records(rows); } @@ -7697,8 +7705,9 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) { join->positions[idx].table= table; join->positions[idx].key=key; - join->positions[idx].records_read=1.0; /* This is a const table */ - join->positions[idx].records_out=1.0; /* This is a const table */ + join->positions[idx].records_read=1.0; /* This is a const table */ + join->positions[idx].records_out=1.0; /* This is a const table */ + join->positions[idx].records_init=1.0; /* This is a const table */ join->positions[idx].cond_selectivity= 1.0; join->positions[idx].ref_depend_map= 0; @@ -7751,7 +7760,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) TODO: Extend with_found_constraint' to be set for a top level expression of type X=Y where X and Y has fields from current table and at least one field from - one o more previous tables. + one or more previous tables. @see also table_after_join_selectivity() produces selectivity of condition that is @@ -7851,37 +7860,29 @@ INDEX_READ_COST cost_for_index_read(const THD *thd, const TABLE *table, DBUG_ENTER("cost_for_index_read"); rows_adjusted= MY_MIN(rows2double(records), (double) thd->variables.max_seeks_for_key); + set_if_bigger(rows_adjusted, 1); + #ifdef OLD_CODE_LIMITED_SEEKS set_if_smaller(rows_adjusted, worst_seeks); #endif if (file->is_clustering_key(key)) { - cost.index_only_cost= file->ha_read_time(key, 1, (ha_rows)rows_adjusted); - /* - Same computation as in ha_read_and_copy_time() - We do it explicitely here as we want to use the original value of - records to compute the record copy cost. - */ - cost.read_cost= (cost.index_only_cost + - rows2double(records) * ROW_COPY_COST_THD(thd)); + cost.index_only_cost= + file->ha_keyread_clustered_and_copy_time(key, 1, rows_adjusted, 0); + /* There is no 'index_only_read' with a clustered index */ + cost.read_cost= cost.index_only_cost; } else if (table->covering_keys.is_set(key) && !table->no_keyread) { - cost.index_only_cost= file->ha_keyread_time(key, 1, (ha_rows)rows_adjusted); + cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0); /* Same computation as in ha_keyread_and_copy_time() */ cost.read_cost= (cost.index_only_cost + - rows2double(records) * KEY_COPY_COST_THD(thd)); + rows2double(records) * file->KEY_COPY_COST); } else { - cost.index_only_cost= file->ha_keyread_time(key, 1, (ha_rows) rows_adjusted); - /* - Note that ha_read_time() + ..ROW_COPY_COST should be same - as ha_rnd_pos_time(). - */ - cost.read_cost= (cost.index_only_cost + - file->ha_read_time(key, 0, (ha_rows)rows_adjusted) + - rows2double(records) * ROW_COPY_COST_THD(thd)); + cost.index_only_cost= file->ha_keyread_time(key, 1, rows_adjusted, 0); + cost.read_cost= (cost.index_only_cost + file->ha_rnd_pos_time(records)); } DBUG_PRINT("statistics", ("index_cost: %.3f full_cost: %.3f", cost.index_only_cost, cost.read_cost)); @@ -7950,8 +7951,8 @@ apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg, read even if selectivity (and thus new_records) would be very low. */ new_cost= (MY_MAX(cost_of_accepted_rows, - ranges * KEY_LOOKUP_COST * io_cost * - table->file->optimizer_cache_cost) + + ranges * table->file->KEY_LOOKUP_COST + + ranges * io_cost * table->file->DISK_READ_RATIO) + cost_of_rejected_rows + filter_lookup_cost); new_total_cost= ((new_cost + new_records * WHERE_COST_THD(thd)) * prev_records + filter_startup_cost); @@ -8015,6 +8016,24 @@ apply_filter(THD *thd, TABLE *table, double *cost, double *records_arg, None */ +struct best_plan +{ + double cost; // Smallest cost found + double records; // Old 'Records' + double records_read; // Records accessed + double records_after_filter; // Records_read + filter + double records_out; // Smallest record count seen + Range_rowid_filter_cost_info *filter; // Best filter + KEYUSE *key; // Best key + SplM_plan_info *spl_plan; + table_map ref_depends_map; + enum join_type type; + uint forced_index; + uint max_key_part; + bool uses_jbuf; +}; + + void best_access_path(JOIN *join, JOIN_TAB *s, @@ -8030,14 +8049,7 @@ best_access_path(JOIN *join, uint use_cond_selectivity= thd->variables.optimizer_use_condition_selectivity; TABLE *table= s->table; - KEYUSE *best_key= 0; - uint best_max_key_part= 0; - uint best_forced_index= MAX_KEY, forced_index= MAX_KEY; my_bool found_constraint= 0; - double best_cost= DBL_MAX; - double records= DBL_MAX; - double records_out= table->stat_records() * table->cond_selectivity; - table_map best_ref_depends_map= 0; /* key_dependent is 0 if all key parts could be used or if there was an EQ_REF table found (which uses all key parts). In other words, we cannot @@ -8045,18 +8057,29 @@ best_access_path(JOIN *join, Otherwise it's a bitmap of tables that could improve key usage. */ table_map key_dependent= 0; - Range_rowid_filter_cost_info *best_filter= 0; double tmp; ha_rows rec; - bool best_uses_jbuf= FALSE; MY_BITMAP *eq_join_set= &s->table->eq_join_set; KEYUSE *hj_start_key= 0; - SplM_plan_info *spl_plan= 0; - enum join_type best_type= JT_UNKNOWN, type= JT_UNKNOWN; Loose_scan_opt loose_scan_opt; + struct best_plan best; Json_writer_object trace_wrapper(thd, "best_access_path"); DBUG_ENTER("best_access_path"); + best.cost= DBL_MAX; + best.records= DBL_MAX; + best.records_read= DBL_MAX; + best.records_after_filter= DBL_MAX; + best.records_out= table->stat_records() * table->cond_selectivity; + best.filter= 0; + best.key= 0; + best.max_key_part= 0; + best.type= JT_UNKNOWN; + best.forced_index= MAX_KEY; + best.ref_depends_map= 0; + best.uses_jbuf= FALSE; + best.spl_plan= 0; + disable_jbuf= disable_jbuf || idx == join->const_tables; trace_wrapper.add_table_name(s); @@ -8066,7 +8089,7 @@ best_access_path(JOIN *join, loose_scan_opt.init(join, s, remaining_tables); if (table->is_splittable()) - spl_plan= s->choose_best_splitting(record_count, remaining_tables); + best.spl_plan= s->choose_best_splitting(record_count, remaining_tables); if (unlikely(thd->trace_started())) { @@ -8077,10 +8100,10 @@ best_access_path(JOIN *join, if (s->keyuse) { /* Use key if possible */ - KEYUSE *keyuse; - KEYUSE *start_key=0; - double best_records= DBL_MAX, index_only_cost= DBL_MAX; + KEYUSE *keyuse, *start_key= 0; + double index_only_cost= DBL_MAX; uint max_key_part=0; + enum join_type type= JT_UNKNOWN; /* Test how we can use keys */ rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key @@ -8102,7 +8125,7 @@ best_access_path(JOIN *join, key_part_map ref_or_null_part= 0; key_part_map all_parts= 0; double startup_cost= s->startup_cost; - double records_after_filter; + double records_after_filter, records_best_filter, records; Range_rowid_filter_cost_info *filter= 0; if (is_hash_join_key_no(key)) @@ -8333,7 +8356,6 @@ best_access_path(JOIN *join, ((double) (table->s->max_key_length-keyinfo->key_length) / (double) table->s->max_key_length))); set_if_smaller(records, (double)s->records); - set_if_smaller(records_out, records); if (records < 2.0) records=2.0; /* Can't be as good as a unique */ } @@ -8400,6 +8422,8 @@ best_access_path(JOIN *join, (!(table->file->index_flags(key, 0, 0) & HA_ONLY_WHOLE_INDEX) || found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts))) { + double extra_cost= 0; + max_key_part= max_part_bit(found_part); /* ReuseRangeEstimateForRef-3: @@ -8524,7 +8548,7 @@ best_access_path(JOIN *join, a*keyinfo->user_defined_key_parts - rec_per_key)/ (keyinfo->user_defined_key_parts-1); else - records= a; + records= rows2double(s->records); set_if_bigger(records, MIN_ROWS_AFTER_FILTERING); } } @@ -8533,6 +8557,7 @@ best_access_path(JOIN *join, { /* We need to do two key searches to find row */ records *= 2.0; + extra_cost= s->table->file->KEY_LOOKUP_COST; } /* @@ -8562,13 +8587,14 @@ best_access_path(JOIN *join, } /* Limit the number of matched rows */ + set_if_smaller(records, (double) s->records); tmp= records; set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key); INDEX_READ_COST cost= cost_for_index_read(thd, table, key, (ha_rows) tmp, (ha_rows) s->worst_seeks); tmp= cost.read_cost; - index_only_cost= cost.index_only_cost; + index_only_cost= cost.index_only_cost+extra_cost; } else { @@ -8590,7 +8616,7 @@ best_access_path(JOIN *join, if (records == DBL_MAX) // Key not usable continue; - records_after_filter= records; + records_best_filter= records_after_filter= records; /* Check that start_key->key can be used for index access @@ -8604,7 +8630,8 @@ best_access_path(JOIN *join, tmp, index_only_cost, record_count, - &records_out); + &records_best_filter); + set_if_smaller(best.records_out, records_best_filter); if (filter) filter= filter->apply_filter(thd, table, &tmp, &records_after_filter, &startup_cost, @@ -8625,20 +8652,31 @@ best_access_path(JOIN *join, The COST_EPS is here to ensure we use the first key if there are two 'identical keys' that could be used. */ - if (tmp + COST_EPS < best_cost) + if (tmp + COST_EPS < best.cost) { trace_access_idx.add("chosen", true); - best_cost= tmp; + best.cost= tmp; /* We use 'records' instead of 'records_after_filter' here as we want to have EXPLAIN print the number of rows found by the key access. */ - best_records= records; // Records before filter! - best_key= start_key; - best_max_key_part= max_key_part; - best_ref_depends_map= found_ref; - best_filter= filter; - best_type= type; + best.records= records; // Records before filter! + best.records_read= records; + /* + If we are using 'use_cond_selectivity > 1' then + table_after_join_selectivity() may take into account other + filters that what is currently used so we have to use + records_after_filter. If 'use_cond_selectivity <= 1 then we + can use information from the best filter. + */ + best.records_after_filter= ((use_cond_selectivity > 1) ? + records_after_filter : + records_best_filter); + best.key= start_key; + best.max_key_part= max_key_part; + best.ref_depends_map= found_ref; + best.filter= filter; + best.type= type; } else if (unlikely(thd->trace_started())) { @@ -8646,9 +8684,8 @@ best_access_path(JOIN *join, add("chosen", false). add("cause", cause ? cause : "cost"); } - set_if_smaller(records_out, records); + set_if_smaller(best.records_out, records); } /* for each key */ - records= best_records; } else { @@ -8671,7 +8708,7 @@ best_access_path(JOIN *join, /* Add dependency for sub queries */ key_dependent|= s->embedded_dependent; - } /* if (s->keyuse) */ + } /* if (s->keyuse) */ /* Check that s->key_dependent contains all used_tables found in s->keyuse */ @@ -8687,7 +8724,7 @@ best_access_path(JOIN *join, (1) s is inner table of semi-join -> join cache is allowed for semijoins (2) s is inner table of outer join -> join cache is allowed for outer joins */ - if (idx > join->const_tables && best_key == 0 && + if (idx > join->const_tables && best.key == 0 && (join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) && join->max_allowed_join_cache_level > 2 && !bitmap_is_clear_all(eq_join_set) && !disable_jbuf && @@ -8696,11 +8733,11 @@ best_access_path(JOIN *join, (!(table->map & join->outer_join) || join->allowed_outer_join_with_cache)) // (2) { - double refills, cmp_time; + double refills, row_copy_cost, cmp_time; /* Estimate the cost of the hash join access to the table */ - double rnd_records= matching_candidates_in_table(s, found_constraint, + double rnd_records= matching_candidates_in_table(s, 0, use_cond_selectivity); - set_if_smaller(records_out, rnd_records); + set_if_smaller(best.records_out, rnd_records); /* The following cost calculation is identical to the cost calculation for @@ -8729,18 +8766,22 @@ best_access_path(JOIN *join, We assume here that, thanks to the hash, we don't have to compare all row combinations, only a HASH_FANOUT (10%) rows in the cache. */ - cmp_time= (rnd_records * record_count * HASH_FANOUT * - (ROW_COPY_COST_THD(thd) * JOIN_CACHE_ROW_COPY_COST_FACTOR + + row_copy_cost= (ROW_COPY_COST_THD(thd) * 2 * + JOIN_CACHE_ROW_COPY_COST_FACTOR(thd)); + cmp_time= (record_count * row_copy_cost + + rnd_records * record_count * HASH_FANOUT * + ((idx - join->const_tables) * row_copy_cost + WHERE_COST_THD(thd))); tmp= COST_ADD(tmp, cmp_time); - best_cost= tmp; - records= rnd_records; - best_key= hj_start_key; - best_ref_depends_map= 0; - best_uses_jbuf= TRUE; - best_filter= 0; - best_type= JT_HASH; + best.cost= tmp; + best.records_read= best.records_after_filter= rows2double(s->records); + best.records= rnd_records; + best.key= hj_start_key; + best.ref_depends_map= 0; + best.uses_jbuf= TRUE; + best.filter= 0; + best.type= JT_HASH; Json_writer_object trace_access_hash(thd); if (unlikely(trace_access_hash.trace_started())) trace_access_hash. @@ -8748,7 +8789,7 @@ best_access_path(JOIN *join, add("index", "hj-key"). add("rows", rnd_records). add("refills", refills). - add("cost", best_cost). + add("cost", best.cost). add("chosen", true); } @@ -8788,21 +8829,25 @@ best_access_path(JOIN *join, be used for cases with small datasets, which is annoying. */ Json_writer_object trace_access_scan(thd); - if ((records >= s->found_records || best_cost > s->read_time) && // (1) - !(best_key && best_key->key == MAX_KEY) && // (2) + if ((best.records_read >= s->found_records || + best.cost > s->read_time) && // (1) + !(best.key && best.key->key == MAX_KEY) && // (2) !(s->quick && s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2) - best_key && s->quick->index == best_key->key && // (2) - best_max_key_part >= table->opt_range[best_key->key].key_parts) &&// (2) - !((table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) - !table->covering_keys.is_clear_all() && best_key && !s->quick) &&// (3) - !(table->force_index_join && best_key && !s->quick) && // (4) - !(best_key && table->pos_in_table_list->jtbm_subselect)) // (5) + best.key && s->quick->index == best.key->key && // (2) + best.max_key_part >= table->opt_range[best.key->key].key_parts) &&// (2) + !((table->file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3) + !table->covering_keys.is_clear_all() && best.key && !s->quick) &&// (3) + !(table->force_index_join && best.key && !s->quick) && // (4) + !(best.key && table->pos_in_table_list->jtbm_subselect)) // (5) { // Check full join - double rnd_records, records_after_filter, org_records; + double records_after_filter, org_records; + double records_best_filter; Range_rowid_filter_cost_info *filter= 0; double startup_cost= s->startup_cost; const char *scan_type= ""; + enum join_type type; + uint forced_index= MAX_KEY; /* Range optimizer never proposes a RANGE if it isn't better @@ -8832,7 +8877,8 @@ best_access_path(JOIN *join, This is done to make records found comparable to what we get with 'ref' access. */ - org_records= records_after_filter= rnd_records= rows2double(s->found_records); + org_records= records_after_filter= rows2double(s->found_records); + records_best_filter= org_records; if (s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) { @@ -8850,11 +8896,13 @@ best_access_path(JOIN *join, range->cost / s->quick->read_time >= 0.9999999)); filter= - table->best_range_rowid_filter_for_partial_join(key_no, rows2double(range->rows), + table->best_range_rowid_filter_for_partial_join(key_no, + rows2double(range->rows), range->find_cost, range->index_only_cost, record_count, - &records_out); + &records_best_filter); + set_if_smaller(best.records_out, records_best_filter); if (filter) { double filter_cost= range->fetch_cost; @@ -8883,20 +8931,18 @@ best_access_path(JOIN *join, { type= JT_INDEX_MERGE; } - set_if_smaller(records_out, records_after_filter); loose_scan_opt.check_range_access(join, idx, s->quick); } else { /* We will now calculate cost of scan, with or without join buffer */ - rnd_records= matching_candidates_in_table(s, found_constraint, - use_cond_selectivity); - records_after_filter= rnd_records; - set_if_smaller(records_out, rnd_records); + records_after_filter= matching_candidates_in_table(s, 0, + use_cond_selectivity); + DBUG_ASSERT(records_after_filter <= s->records); - org_records= rows2double(s->records); + set_if_smaller(best.records_out, records_after_filter); - DBUG_ASSERT(rnd_records <= s->records); + org_records= rows2double(s->records); /* Estimate cost of reading table. */ if (s->cached_forced_index_type) @@ -8907,7 +8953,7 @@ best_access_path(JOIN *join, } else { - if (table->force_index_join && !best_key) + if (table->force_index_join && !best.key) { /* The query is using 'forced_index' and we did not find a usable key. @@ -8951,6 +8997,7 @@ best_access_path(JOIN *join, tmp= s->cached_scan_and_compare_time; type= JT_ALL; } + /* Cache result for other calls */ s->cached_forced_index_type= type; s->cached_forced_index_cost= tmp; s->cached_forced_index= forced_index; @@ -8977,7 +9024,7 @@ best_access_path(JOIN *join, else { /* Scan trough join cache */ - double cmp_time, refills; + double cmp_time, row_copy_cost, refills; /* Calculate cost of checking the the WHERE for this table. @@ -8995,13 +9042,16 @@ best_access_path(JOIN *join, /* We come here only if there are already rows in the join cache */ DBUG_ASSERT(idx != join->const_tables); /* - Cost of moving each row from each previous table from the join cache - to it's table record and comparing it with the found and accepted - row. + Cost of: + - Copying all previous record combinations to the join cache + - Copying the tables from the join cache to table records + - Checking the WHERE against the final row combination */ - cmp_time= (rnd_records * record_count * - (ROW_COPY_COST_THD(thd) * (idx - join->const_tables) * - JOIN_CACHE_ROW_COPY_COST_FACTOR + + row_copy_cost= (ROW_COPY_COST_THD(thd) * + JOIN_CACHE_ROW_COPY_COST_FACTOR(thd)); + cmp_time= (record_count * row_copy_cost + + records_after_filter * record_count * + ((idx - join->const_tables) * row_copy_cost + WHERE_COST_THD(thd))); tmp= COST_ADD(tmp, cmp_time); } @@ -9017,10 +9067,10 @@ best_access_path(JOIN *join, trace_access_scan. add("access_type", type == JT_ALL ? scan_type : join_type_str[type]). - add("rows", org_records). - add("rows_after_scan", rnd_records). - add("rows_after_filter", records_after_filter). - add("cost", tmp); + add("rows", org_records). + add("rows_after_filter", records_after_filter). + add("rows_out", best.records_out). + add("cost", tmp); if (type == JT_ALL) { trace_access_scan.add("index_only", @@ -9028,27 +9078,38 @@ best_access_path(JOIN *join, } } - if (tmp + COST_EPS < best_cost) + if (tmp + COST_EPS < best.cost) { /* If the table has a range (s->quick is set) make_join_select() will ensure that this will be used */ - best_cost= tmp; - records= rnd_records; - best_key= 0; - best_forced_index= forced_index; + best.cost= tmp; + best.records_read= org_records; // Records accessed + best.records= records_after_filter; // Records to be checked with WHERE + /* + If we are using 'use_cond_selectivity > 1' then + table_after_join_selectivity may take into account other + filters that what is currently used so we have to use + records_after_filter. If 'use_cond_selectivity <= 1 then we + can use information from the best filter. + */ + best.records_after_filter= ((use_cond_selectivity > 1) ? + records_after_filter : + records_best_filter); + best.key= 0; + best.forced_index= forced_index; /* filter is only set if s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE */ - best_filter= filter; + best.filter= filter; /* range/index_merge/ALL/index access method are "independent", so: */ - best_ref_depends_map= 0; - best_uses_jbuf= MY_TEST(!disable_jbuf && !((table->map & + best.ref_depends_map= 0; + best.uses_jbuf= MY_TEST(!disable_jbuf && !((table->map & join->outer_join))); - spl_plan= 0; - best_type= type; + best.spl_plan= 0; + best.type= type; trace_access_scan.add("chosen", true); } else @@ -9063,29 +9124,33 @@ best_access_path(JOIN *join, add("cause", "cost"); } + crash_if_first_double_is_bigger(best.records_out, best.records); + crash_if_first_double_is_bigger(best.records_out, best.records_read); + /* Update the cost information for the current partial plan */ - crash_if_first_double_is_bigger(records_out, records); - pos->records_read= records; - pos->records_out= records_out; - pos->read_time= best_cost; - pos->key= best_key; - pos->forced_index= best_forced_index; - pos->type= best_type; + pos->records_init= best.records_read; + pos->records_after_filter= best.records_after_filter; + pos->records_read= best.records; + pos->records_out= best.records_out; + pos->read_time= best.cost; + pos->key= best.key; + pos->forced_index= best.forced_index; + pos->type= best.type; pos->table= s; - pos->ref_depend_map= best_ref_depends_map; + pos->ref_depend_map= best.ref_depends_map; pos->loosescan_picker.loosescan_key= MAX_KEY; - pos->use_join_buffer= best_uses_jbuf; - pos->spl_plan= spl_plan; - pos->range_rowid_filter_info= best_filter; - pos->key_dependent= (best_type == JT_EQ_REF ? (table_map) 0 : + pos->use_join_buffer= best.uses_jbuf; + pos->spl_plan= best.spl_plan; + pos->range_rowid_filter_info= best.filter; + pos->key_dependent= (best.type == JT_EQ_REF ? (table_map) 0 : key_dependent & remaining_tables); loose_scan_opt.save_to_position(s, loose_scan_pos); - if (!best_key && - idx == join->const_tables && + if (!best.key && + idx == join->const_tables && // First table table == join->sort_by_table && - join->unit->lim.get_select_limit() >= records) + join->unit->lim.get_select_limit() >= best.records) // QQQ Why? { trace_access_scan.add("use_tmp_table", true); join->sort_by_table= (TABLE*) 1; // Must use temporary table @@ -9320,15 +9385,6 @@ choose_plan(JOIN *join, table_map join_tables, TABLE_LIST *emb_sjm_nest) DBUG_RETURN(TRUE); } - /* - Store the cost of this query into a user variable - Don't update last_query_cost for statements that are not "flat joins" : - i.e. they have subqueries, unions or call stored procedures. - TODO: calculate a correct cost for a query with subqueries and UNIONs. - */ - if (join->thd->lex->is_single_level_stmt()) - join->thd->status_var.last_query_cost= join->best_read; - join->emb_sjm_nest= 0; DBUG_RETURN(FALSE); } @@ -9595,6 +9651,8 @@ optimize_straight_join(JOIN *join, table_map remaining_tables) { POSITION *position= join->positions + idx; Json_writer_object trace_one_table(thd); + double original_record_count, current_record_count; + if (unlikely(thd->trace_started())) trace_plan_prefix(join, idx, remaining_tables); /* Find the best access method from 's' to the current partial plan */ @@ -9603,22 +9661,71 @@ optimize_straight_join(JOIN *join, table_map remaining_tables) position, &loose_scan_pos); /* Compute the cost of the new plan extended with 's' */ - record_count= COST_MULT(record_count, position->records_read); + current_record_count= COST_MULT(record_count, position->records_out); read_time= COST_ADD(read_time, position->read_time); - optimize_semi_joins(join, remaining_tables, idx, &record_count, &read_time, - &loose_scan_pos); + original_record_count= current_record_count; + optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count, + &read_time, &loose_scan_pos); + if (position->sj_strategy != SJ_OPT_NONE && original_record_count) + { + /* Adjust records_out to contain the final number of rows */ + double ratio= current_record_count / original_record_count; + /* QQQ This is just to stop an assert later */ + if (ratio < 1) + position->records_out*= ratio; + } + remaining_tables&= ~(s->table->map); - double pushdown_cond_selectivity= 1.0; - if (use_cond_selectivity > 1) + if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE) + { + double pushdown_cond_selectivity, records_out; pushdown_cond_selectivity= table_after_join_selectivity(join, idx, s, - remaining_tables); - position->cond_selectivity= pushdown_cond_selectivity; + remaining_tables, + &records_out); + if (unlikely(thd->trace_started()) && + pushdown_cond_selectivity != 1.0) + { + trace_one_table. + add("pushdown_cond_selectivity", pushdown_cond_selectivity). + add("rows_out", records_out); + } + position->cond_selectivity= pushdown_cond_selectivity; + position->records_out= records_out; + current_record_count= COST_MULT(record_count, records_out); + } + else + position->cond_selectivity= 1.0; ++idx; + record_count= current_record_count; } if (join->sort_by_table && join->sort_by_table != join->positions[join->const_tables].table->table) - read_time+= record_count; // We have to make a temp table + { + /* + We may have to make a temp table, note that this is only a + heuristic since we cannot know for sure at this point if we + we are going to use addon fields or to have flush sorting to + disk. We also don't know the temporary table will be in memory + or disk. + The following calculation takes a middle ground where assume + we can sort the keys in memory but have to use a disk based + temporary table to retrive the rows. + This cost is probably much bigger than it has to be... + */ + double sort_cost; + sort_cost= (get_qsort_sort_cost((ha_rows)record_count, 0) + + record_count * + DISK_TEMPTABLE_LOOKUP_COST(thd)); + { + if (unlikely(thd->trace_started())) + { + Json_writer_object trace_one_table(thd); + trace_one_table.add("estimated_cost_for_sorting", sort_cost); + } + } + read_time= COST_ADD(read_time, sort_cost); + } memcpy((uchar*) join->best_positions, (uchar*) join->positions, sizeof(POSITION)*idx); join->join_record_count= record_count; @@ -9997,8 +10104,7 @@ double JOIN::get_examined_rows() COST_MULT((double) (tab->get_examined_rows()), prev_fanout)); prev_tab= tab; } - examined_rows= (double) - (records > (double) HA_ROWS_MAX ? HA_ROWS_MAX : (ha_rows) records); + examined_rows= double_to_rows(records); return examined_rows; } @@ -10129,9 +10235,10 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, @brief Get the selectivity of conditions when joining a table - @param join The optimized join - @param s The table to be joined for evaluation - @param rem_tables The bitmap of tables to be joined later + @param join The optimized join + @param s The table to be joined for evaluation + @param rem_tables The bitmap of tables to be joined later + @param new_records_out OUT Set to number of rows accepted @detail Get selectivity of conditions that can be applied when joining this table @@ -10145,12 +10252,14 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, condition, "COND(this_table) AND COND(this_table, previous_tables)". @retval - selectivity of the conditions imposed on the rows of s + selectivity of the conditions imposed on the rows of s related to + the rows that we are expected to read (position->records_init). */ static double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, - table_map rem_tables) + table_map rem_tables, + double *new_records_out) { uint16 ref_keyuse_steps_buf[MAX_REF_PARTS]; uint ref_keyuse_size= MAX_REF_PARTS; @@ -10158,13 +10267,14 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, Field *field; TABLE *table= s->table; MY_BITMAP *read_set= table->read_set; - double sel= table->cond_selectivity; POSITION *pos= &join->positions[idx]; + double sel, records_out= pos->records_out; uint keyparts= 0; uint found_part_ref_or_null= 0; if (pos->key != 0) { + sel= table->cond_selectivity; /* A ref access or hash join is used for this table. ref access is created from @@ -10338,35 +10448,22 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, } keyuse++; } - } - else - { /* - The table is accessed with full table scan, or quick select. - Selectivity of COND(table) is already accounted for in - matching_candidates_in_table(). - */ - sel= 1.0; - } + If the field f from the table is equal to a field from one the + earlier joined tables then the selectivity of the range conditions + over the field f must be discounted. - /* - If the field f from the table is equal to a field from one the - earlier joined tables then the selectivity of the range conditions - over the field f must be discounted. - - We need to discount selectivity only if we're using ref-based - access method (and have sel!=1). - If we use ALL/range/index_merge, then sel==1, and no need to discount. - */ - if (pos->key != NULL) - { + We need to discount selectivity only if we're using ref-based + access method (and have sel!=1). + If we use ALL/range/index_merge, then sel==1, and no need to discount. + */ for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++) { if (!bitmap_is_set(read_set, field->field_index) || !field->next_equal_field) - continue; - for (Field *next_field= field->next_equal_field; - next_field != field; + continue; + for (Field *next_field= field->next_equal_field; + next_field != field; next_field= next_field->next_equal_field) { if (!(next_field->table->map & rem_tables) && @@ -10381,14 +10478,39 @@ double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s, } } } + /* + We have now calculated a more exact 'records_out' taking more index + costs into account. + pos->records_out previously contained the smallest record count for + all range or ref access, which should not be smaller than what we + calculated above. + */ + records_out= pos->records_after_filter * sel; + set_if_smaller(records_out, pos->records_out); } - sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, + sel= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, keyparts, ref_keyuse_steps); + records_out*= sel; + + /* + Update sel to be relative pos->records_read as that is what some old + code expects. Newer code should just use 'position->records_out' instead. + */ + if (pos->records_read == 0) + sel= 1.0; + else + { + sel= records_out / pos->records_read; + DBUG_ASSERT(sel >= 0.0 and sel <= 1.00001); + if (sel > 1.0) + sel= 1.0; + } + exit: + *new_records_out= records_out; if (ref_keyuse_steps != ref_keyuse_steps_buf) my_free(ref_keyuse_steps); - DBUG_ASSERT(sel >= 0.0 and sel <= 1.0); return sel; } @@ -10407,7 +10529,7 @@ check_if_edge_table(POSITION *pos, if ((pos->type == JT_EQ_REF || (pos->type == JT_REF && - pos->records_read == 1 && + pos->records_init == 1 && !pos->range_rowid_filter_info)) && pushdown_cond_selectivity >= 0.999) return SEARCH_FOUND_EDGE; @@ -10600,7 +10722,7 @@ get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx, // pplan_cost already too great, stop search continue; - pplan= expand pplan by best_access_method; + pplan= expand plan by best_access_method; remaining_tables= remaining_tables - table T; if (remaining_tables is not an empty set and @@ -10671,8 +10793,8 @@ best_extension_by_limited_search(JOIN *join, { THD *thd= join->thd; /* - 'join' is a partial plan with lower cost than the best plan so far, - so continue expanding it further with the tables in 'remaining_tables'. + 'join' is a partial plan with lower cost than the best plan so far, + so continue expanding it further with the tables in 'remaining_tables'. */ JOIN_TAB *s; double best_record_count= DBL_MAX; @@ -10689,14 +10811,14 @@ best_extension_by_limited_search(JOIN *join, if (dbug_user_var_equals_int(thd, "show_explain_probe_select_id", join->select_lex->select_number)) - dbug_serve_apcs(thd, 1); - ); + dbug_serve_apcs(thd, 1); + ); if (unlikely(thd->check_killed())) // Abort DBUG_RETURN(SEARCH_ABORT); DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time, - "part_plan");); + "part_plan");); status_var_increment(thd->status_var.optimizer_join_prefixes_check_calls); if (join->emb_sjm_nest) @@ -10785,7 +10907,7 @@ best_extension_by_limited_search(JOIN *join, !check_interleaving_with_nj(s)) { table_map real_table_bit= s->table->map; - double current_record_count, current_read_time; + double current_record_count, current_read_time, original_record_count; double partial_join_cardinality; POSITION *position= join->positions + idx, *loose_scan_pos; double pushdown_cond_selectivity; @@ -10802,7 +10924,7 @@ best_extension_by_limited_search(JOIN *join, loose_scan_pos= pos->position+1; /* Compute the cost of the new plan extended with 's' */ - current_record_count= COST_MULT(record_count, position->records_read); + current_record_count= COST_MULT(record_count, position->records_out); current_read_time= COST_ADD(read_time, position->read_time); if (unlikely(trace_one_table.trace_started())) @@ -10811,9 +10933,22 @@ best_extension_by_limited_search(JOIN *join, add("rows_for_plan", current_record_count). add("cost_for_plan", current_read_time); } + original_record_count= current_record_count; optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count, ¤t_read_time, loose_scan_pos); - + if (position->sj_strategy != SJ_OPT_NONE) + { + /* Adjust records_out and current_record_count after semi join */ + double ratio= current_record_count / original_record_count; + /* QQQ This is just to stop an assert later */ + if (ratio < 1.0) + position->records_out*= ratio; + if (unlikely(trace_one_table.trace_started())) + { + trace_one_table.add("sj_rows_out", position->records_out); + trace_one_table.add("sj_rows_for_plan", current_record_count); + } + } /* Expand only partial plans with lower cost than the best QEP so far */ if (current_read_time + COST_EPS >= join->best_read) { @@ -10864,15 +10999,15 @@ best_extension_by_limited_search(JOIN *join, if (best_record_count > current_record_count || best_read_time > current_read_time || (idx == join->const_tables && // 's' is the first table in the QEP - s->table == join->sort_by_table)) + s->table == join->sort_by_table)) { /* Store the current record count and cost as the best possible cost at this level if the following holds: - It's the lowest record number and cost so far - - There is no remaing table that could improve index usage - or we found an EQ_REF or REF key with less than 2 - matching records (good enough). + - There is no remaing table that could improve index usage + or we found an EQ_REF or REF key with less than 2 + matching records (good enough). */ if (best_record_count >= current_record_count && best_read_time >= current_read_time && @@ -10924,17 +11059,26 @@ best_extension_by_limited_search(JOIN *join, } pushdown_cond_selectivity= 1.0; - if (use_cond_selectivity > 1) + /* + TODO: When a semi-join strategy is applied (sj_strategy!=SJ_OPT_NONE), + we should account for selectivity from table_after_join_selectivity(). + (Condition filtering is performed before the semi-join removes some + fanout so this might require moving the code around) + */ + if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE) + { pushdown_cond_selectivity= table_after_join_selectivity(join, idx, s, - remaining_tables & ~real_table_bit); + remaining_tables & ~real_table_bit, + &position->records_out); + } join->positions[idx].cond_selectivity= pushdown_cond_selectivity; - partial_join_cardinality= (current_record_count * - pushdown_cond_selectivity); + partial_join_cardinality= record_count * position->records_out; - if (unlikely(thd->trace_started()) && pushdown_cond_selectivity < 1.0) + if (unlikely(thd->trace_started()) && pushdown_cond_selectivity < 1.0 && + partial_join_cardinality < current_record_count) trace_one_table .add("selectivity", pushdown_cond_selectivity) .add("estimated_join_cardinality", partial_join_cardinality); @@ -10979,11 +11123,21 @@ best_extension_by_limited_search(JOIN *join, { /* We may have to make a temp table, note that this is only a - heuristic since we cannot know for sure at this point. - Hence it may be wrong. + heuristic since we cannot know for sure at this point if we + we are going to use addon fields or to have flush sorting to + disk. We also don't know the temporary table will be in memory + or disk. + The following calculation takes a middle ground where assume + we can sort the keys in memory but have to use a disk based + temporary table to retrive the rows. + This cost is probably much bigger than it has to be... */ - trace_one_table.add("cost_for_sorting", current_record_count); - current_read_time= COST_ADD(current_read_time, current_record_count); + double sort_cost; + sort_cost= (get_qsort_sort_cost((ha_rows)current_record_count,0) + + current_record_count * + DISK_TEMPTABLE_LOOKUP_COST(thd)); + trace_one_table.add("cost_for_sorting", sort_cost); + current_read_time= COST_ADD(current_read_time, sort_cost); } if (current_read_time < join->best_read) { @@ -11318,11 +11472,8 @@ prev_record_reads(const POSITION *positions, uint idx, table_map found_ref) is an inprecise estimate and adding 1 (or, in the worst case, #max_nested_outer_joins=64-1) will not make it any more precise. */ - if (pos->records_read) - { - found= COST_MULT(found, pos->records_read); - found*= pos->cond_selectivity; - } + if (pos->records_out) + found= COST_MULT(found, pos->records_out); } } return found; @@ -11752,7 +11903,7 @@ bool JOIN::get_best_combination() */ SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info; j->records_read= (sjm->is_sj_scan? sjm->rows : 1.0); - j->records_out= j->records_read; + j->records_init= j->records_out= j->records_read; j->records= (ha_rows) j->records_read; j->cond_selectivity= 1.0; JOIN_TAB *jt; @@ -11787,6 +11938,7 @@ bool JOIN::get_best_combination() if (j->type == JT_SYSTEM) goto loop_end; + if (!(keyuse= cur_pos->key)) { if (cur_pos->type == JT_NEXT) // Forced index @@ -11807,17 +11959,19 @@ bool JOIN::get_best_combination() j->range_rowid_filter_info= cur_pos->range_rowid_filter_info; - loop_end: - /* + /* Save records_read in JOIN_TAB so that select_describe()/etc don't have to access join->best_positions[]. */ + j->records_init= cur_pos->records_init; j->records_read= cur_pos->records_read; j->records_out= cur_pos->records_out; + + loop_end: j->cond_selectivity= cur_pos->cond_selectivity; DBUG_ASSERT(j->cond_selectivity <= 1.0); crash_if_first_double_is_bigger(j->records_out, - j->records_read * + j->records_init * (j->range_rowid_filter_info ? j->range_rowid_filter_info->selectivity : 1.0)); @@ -12580,7 +12734,10 @@ make_outerjoin_info(JOIN *join) { if (embedding->is_active_sjm()) { - /* We're trying to walk out of an SJ-Materialization nest. Don't do this. */ + /* + We're trying to walk out of an SJ-Materialization nest. + Don't do this. + */ break; } /* Ignore sj-nests: */ @@ -12861,8 +13018,10 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) tab->use_quick=1; tab->ref.key= -1; tab->ref.key_parts=0; // Don't use ref key. - join->best_positions[i].records_read= rows2double(tab->quick->records); - /* + join->best_positions[i].records_read= + join->best_positions[i].records_out= + rows2double(tab->quick->records); + /* We will use join cache here : prevent sorting of the first table only and sort at the end. */ @@ -14906,14 +15065,14 @@ void JOIN_TAB::cleanup() /** Estimate the time to get rows of the joined table - Updates found_records, records, cached_scan_time, cached_covering_key, - read_time and cache_scan_and_compare_time + Updates found_records, records, cached_covering_key, read_time and + cache_scan_and_compare_time */ void JOIN_TAB::estimate_scan_time() { THD *thd= join->thd; - double copy_cost= ROW_COPY_COST_THD(thd); + double copy_cost; cached_covering_key= MAX_KEY; if (table->is_created()) @@ -14924,6 +15083,7 @@ void JOIN_TAB::estimate_scan_time() &startup_cost); table->opt_range_condition_rows= records; table->used_stat_records= records; + copy_cost= table->file->ROW_COPY_COST; } else { @@ -14937,21 +15097,38 @@ void JOIN_TAB::estimate_scan_time() if (!table->covering_keys.is_clear_all() && ! table->no_keyread) { cached_covering_key= find_shortest_key(table, &table->covering_keys); - read_time= table->file->ha_key_scan_time(cached_covering_key); - copy_cost= KEY_COPY_COST_THD(thd); + read_time= table->file->ha_key_scan_time(cached_covering_key, records); + copy_cost= 0; // included in ha_key_scan_time } else - read_time= table->file->ha_scan_time(); + { + read_time= table->file->ha_scan_time(records); + copy_cost= 0; + } } } else { + /* + The following is same as calling + TABLE_SHARE::update_optimizer_costs, but without locks + */ + if (table->s->db_type() == heap_hton) + memcpy(&table->s->optimizer_costs, &heap_optimizer_costs, + sizeof(heap_optimizer_costs)); + else + memcpy(&table->s->optimizer_costs, &tmp_table_optimizer_costs, + sizeof(tmp_table_optimizer_costs)); + table->file->set_optimizer_costs(thd); + table->s->optimizer_costs_inited=1 ; + records= table->stat_records(); DBUG_ASSERT(table->opt_range_condition_rows == records); - read_time= records ? (double) records: 10.0;// TODO:fix this stub + read_time= table->file->ha_scan_time(MY_MAX(records, 1000)); // Needs fix.. + copy_cost= table->s->optimizer_costs.row_copy_cost; } + found_records= records; - cached_scan_time= read_time; cached_scan_and_compare_time= (read_time + records * (copy_cost + WHERE_COST_THD(thd))); } @@ -14996,7 +15173,7 @@ ha_rows JOIN_TAB::get_examined_rows() } } else - examined_rows= records_read; + examined_rows= records_init; if (examined_rows >= (double) HA_ROWS_MAX) return HA_ROWS_MAX; @@ -18496,7 +18673,7 @@ table_map JOIN::get_allowed_nj_tables(uint idx) first_alt TRUE <=> Use the LooseScan plan for the first_tab no_jbuf_before Don't allow to use join buffering before this table - reopt_rec_count OUT New output record count + outer_rec_count OUT New output record count reopt_cost OUT New join prefix cost DESCRIPTION @@ -18551,6 +18728,8 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables; join->cur_sj_inner_tables= 0; + double inner_fanout= 1.0; + for (i= first_tab; i <= last_tab; i++) { JOIN_TAB *rs= join->positions[i].table; @@ -18563,31 +18742,43 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, join->positions, i, TRUE, rec_count, &pos, &loose_scan_pos); + if ((i == first_tab && first_alt)) + pos= loose_scan_pos; } else pos= join->positions[i]; - if ((i == first_tab && first_alt)) - pos= loose_scan_pos; - reopt_remaining_tables &= ~rs->table->map; - rec_count= COST_MULT(rec_count, pos.records_read); cost= COST_ADD(cost, pos.read_time); - //TODO: take into account join condition selectivity here - double pushdown_cond_selectivity= 1.0; - table_map real_table_bit= rs->table->map; - if (join->thd->variables.optimizer_use_condition_selectivity > 1) + + double records_out= pos.records_out; + /* + The (i != last_tab) is here to mimic what + best_extension_by_limited_search() does: do not call + table_after_join_selectivity() for the join_tab where the semi-join + strategy is applied + */ + if (i != last_tab && + join->thd->variables.optimizer_use_condition_selectivity > 1) { + table_map real_table_bit= rs->table->map; + double __attribute__((unused)) pushdown_cond_selectivity; pushdown_cond_selectivity= table_after_join_selectivity(join, i, rs, reopt_remaining_tables & - ~real_table_bit); + ~real_table_bit, &records_out); } - (*outer_rec_count) *= pushdown_cond_selectivity; - if (!rs->emb_sj_nest) - *outer_rec_count= COST_MULT(*outer_rec_count, pos.records_read); + rec_count= COST_MULT(rec_count, records_out); + *outer_rec_count= COST_MULT(*outer_rec_count, records_out); + if (rs->emb_sj_nest) + inner_fanout= COST_MULT(inner_fanout, records_out); } + + /* Discount the fanout produced by the subquery */ + if (inner_fanout > 1.0) + *outer_rec_count /= inner_fanout; + join->cur_sj_inner_tables= save_cur_sj_inner_tables; *reopt_cost= cost; @@ -20828,7 +21019,7 @@ TABLE *create_tmp_table_for_schema(THD *thd, TMP_TABLE_PARAM *param, { TABLE *table; Create_tmp_table maker((ORDER *) NULL, false, false, - select_options, HA_POS_ERROR); + select_options, HA_ROWS_MAX); if (!(table= maker.start(thd, param, &table_alias)) || maker.add_schema_fields(thd, table, param, schema_table) || maker.finalize(thd, table, param, do_not_open, keep_row_order)) @@ -21008,7 +21199,6 @@ bool Virtual_tmp_table::sp_set_all_fields_from_item(THD *thd, Item *value) return false; } - bool open_tmp_table(TABLE *table) { int error; @@ -21022,6 +21212,7 @@ bool open_tmp_table(TABLE *table) } table->db_stat= HA_OPEN_KEYFILE; (void) table->file->extra(HA_EXTRA_QUICK); /* Faster */ + table->file->set_optimizer_costs(table->in_use); if (!table->is_created()) { table->set_created(); @@ -24702,31 +24893,40 @@ ok: @return MAX_KEY no suitable key found key index otherwise + + @notes + We should not use keyread_time() as in the case of disk_read_cost= 0 + all keys would be regarded equal. */ uint find_shortest_key(TABLE *table, const key_map *usable_keys) { - double min_cost= DBL_MAX; + size_t min_length= INT_MAX32; uint best= MAX_KEY; - if (!usable_keys->is_clear_all()) + uint possible_keys= usable_keys->bits_set(); + + if (possible_keys) { + if (possible_keys == 1) + return usable_keys->find_first_bit(); + for (uint nr=0; nr < table->s->keys ; nr++) { if (usable_keys->is_set(nr)) { - double cost= table->file->ha_key_scan_time(nr); - if (cost < min_cost) + size_t length= table->key_storage_length(nr); + if (length < min_length) { - min_cost= cost; - best=nr; + min_length= length; + best= nr; } - DBUG_ASSERT(best < MAX_KEY); } } } return best; } + /** Test if a second key is the subkey of the first one. @@ -28244,6 +28444,7 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, // psergey-todo: data for filtering! tracker= &eta->tracker; jbuf_tracker= &eta->jbuf_tracker; + jbuf_unpack_tracker= &eta->jbuf_unpack_tracker; /* Enable the table access time tracker only for "ANALYZE stmt" */ if (thd->lex->analyze_stmt) @@ -28472,12 +28673,13 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, ha_rows examined_rows= get_examined_rows(); eta->rows_set= true; - eta->rows= examined_rows; + eta->rows= double_to_rows(examined_rows); /* "filtered" */ float f= 0.0; if (examined_rows) { +#ifdef OLD_CODE // QQQ double pushdown_cond_selectivity= cond_selectivity; if (pushdown_cond_selectivity != 1.0) f= (float) (100.0 * pushdown_cond_selectivity); @@ -28485,6 +28687,9 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, f= (float) (100.0 * range_rowid_filter_info->selectivity); else f= (float) (100.0 * records_read / examined_rows); +#else + f= (float) (100.0 * records_out / examined_rows); +#endif } set_if_smaller(f, 100.0); eta->filtered_set= true; @@ -28880,9 +29085,9 @@ int JOIN::save_explain_data_intern(Explain_query *output, continue; } - Explain_table_access *eta= (new (output->mem_root) - Explain_table_access(output->mem_root)); + Explain_table_access(output->mem_root, + thd->lex->analyze_stmt)); if (!eta) DBUG_RETURN(1); @@ -29922,7 +30127,7 @@ void JOIN::cache_const_exprs() - If there is no quick select return the full cost from cost_for_index_read() (Doing a full scan with up to 'limit' records) - @param pos Result from best_acccess_path(). Is NULL for + @param pos Result from best_access_path(). Is NULL for single-table UPDATE/DELETE @param table Table to be sorted @param keynr Which index to use @@ -30008,7 +30213,7 @@ static bool get_range_limit_read_cost(const POSITION *pos, /* Calculate the number of rows we have to check if we are - doing a full index scan (as a suitabe range scan was not available). + doing a full index scan (as a suitable range scan was not available). We assume that each of the tested indexes is not correlated with ref_key. Thus, to select first N records we have to scan @@ -30197,12 +30402,12 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, trace_cheaper_ordering.add_table_name(tab); else trace_cheaper_ordering.add_table_name(table); - trace_cheaper_ordering - .add("rows_estimation", rows_estimate) - .add("read_cost", read_time) - .add("filesort_cost", filesort_cost) - .add("filesort_type", filesort_names[filesort_type].str) - .add("fanout", fanout); + trace_cheaper_ordering. + add("rows_estimation", rows_estimate). + add("filesort_cost", filesort_cost). + add("read_cost", read_time). + add("filesort_type", filesort_names[filesort_type].str). + add("fanout", fanout); } Json_writer_array possible_keys(thd,"possible_keys"); diff --git a/sql/sql_select.h b/sql/sql_select.h index 536f828ee1d..45bf4ba5fe8 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -309,6 +309,7 @@ typedef struct st_join_table { Table_access_tracker *tracker; Table_access_tracker *jbuf_tracker; + Time_and_counter_tracker *jbuf_unpack_tracker; // READ_RECORD::Setup_func materialize_table; READ_RECORD::Setup_func read_first_record; @@ -341,6 +342,9 @@ typedef struct st_join_table { */ double read_time; + /* Copy of POSITION::records_init, set by get_best_combination() */ + double records_init; + /* Copy of POSITION::records_read, set by get_best_combination() */ double records_read; @@ -356,7 +360,6 @@ typedef struct st_join_table { double partial_join_cardinality; /* set by estimate_scan_time() */ - double cached_scan_time; double cached_scan_and_compare_time; double cached_forced_index_cost; @@ -959,21 +962,44 @@ public: /* The table that's put into join order */ JOIN_TAB *table; + /* number of rows that will be read from the table */ + double records_init; + + /* + Number of rows left after filtering, calculated in best_access_path() + In case of use_cond_selectivity > 1 it contains rows after the used + rowid filter (if such one exists). + If use_cond_selectivity <= 1 it contains the minimum rows of any + rowid filtering or records_init if no filter exists. + */ + double records_after_filter; + /* - The number of rows that will be read from the table + Number of expected rows before applying the full WHERE clause. This + includes rowid filter and table->cond_selectivity if + use_cond_selectivity > 1. See matching_candidates_in_table(). + Should normally not be used. */ double records_read; /* - The "fanout": number of output rows that will be produced (after + The number of rows after applying the WHERE clause. + + Same as the "fanout": number of output rows that will be produced (after pushed down selection condition is applied) per each row combination of previous tables. - This takes into account table->cond_selectivity, the WHERE clause - related to this table calculated in - calculate_cond_selectivity_for_table(), and the used rowid filter but - does not take into account the WHERE clause involving preceding tables - calculated in table_after_join_selectivity(). + In best_access_path() it is set to the minum number of accepted rows + for any possible access method or filter: + + records_out takes into account table->cond_selectivity, the WHERE clause + related to this table calculated in calculate_cond_selectivity_for_table(), + and the used rowid filter. + + After best_access_path() records_out it does not yet take into + account the part of the WHERE clause involving preceding tables. + records_out is updated in best_extension_by_limited_search() to take these + tables into account by calling table_after_join_selectivity(). */ double records_out; diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 87f697c5ae9..d94bd127311 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -50,6 +50,7 @@ #include "authors.h" #include "contributors.h" #include "sql_partition.h" +#include "optimizer_defaults.h" #ifdef HAVE_EVENT_SCHEDULER #include "events.h" #include "event_data_objects.h" @@ -3668,6 +3669,9 @@ const char* get_one_variable(THD *thd, /* 6 is the default precision for '%f' in sprintf() */ end= buff + my_fcvt(*value.as_double, 6, buff, NULL); break; + case SHOW_OPTIMIZER_COST: // Stored in 1ms, displayed in us + end= buff + my_fcvt(*value.as_double*1000, 6, buff, NULL); + break; case SHOW_LONG_STATUS: value.as_char= status_var_value.as_char + value.as_intptr; /* fall through */ @@ -9188,6 +9192,49 @@ int fill_key_cache_tables(THD *thd, TABLE_LIST *tables, COND *cond) } +/* Ensure we return 'OPTIMIZER_COST_UNDEF' if cost < 0 */ + +static double fix_cost(double cost) +{ + return cost < 0 ? OPTIMIZER_COST_UNDEF : cost; +} + +static int run_fill_optimizer_costs_tables(const LEX_CSTRING *name, + const OPTIMIZER_COSTS *costs, + TABLE *table) +{ + THD *thd= table->in_use; + DBUG_ENTER("run_fill_optimizer_costs_tables"); + + restore_record(table, s->default_values); + table->field[0]->store(name->str, name->length, system_charset_info); + table->field[1]->store(fix_cost(costs->disk_read_cost*1000.0)); + table->field[2]->store(fix_cost(costs->index_block_copy_cost*1000.0)); + table->field[3]->store(fix_cost(costs->key_cmp_cost*1000.0)); + table->field[4]->store(fix_cost(costs->key_copy_cost*1000.0)); + table->field[5]->store(fix_cost(costs->key_lookup_cost*1000.0)); + table->field[6]->store(fix_cost(costs->key_next_find_cost*1000.0)); + table->field[7]->store(fix_cost(costs->disk_read_ratio)); + table->field[8]->store(fix_cost(costs->row_copy_cost*1000.0)); + table->field[9]->store(fix_cost(costs->row_lookup_cost*1000.0)); + table->field[10]->store(fix_cost(costs->row_next_find_cost*1000.0)); + table->field[11]->store(fix_cost(costs->rowid_cmp_cost*1000.0)); + table->field[12]->store(fix_cost(costs->rowid_copy_cost*1000.0)); + + DBUG_RETURN(schema_table_store_record(thd, table)); +} + + +int fill_optimizer_costs_tables(THD *thd, TABLE_LIST *tables, COND *cond) +{ + DBUG_ENTER("fill_optimizer_costs_tables"); + + int res= process_optimizer_costs(run_fill_optimizer_costs_tables, + tables->table); + DBUG_RETURN(res); +} + + namespace Show { ST_FIELD_INFO schema_fields_info[]= @@ -9816,6 +9863,25 @@ ST_FIELD_INFO keycache_fields_info[]= }; +ST_FIELD_INFO optimizer_costs_fields_info[]= +{ + Column("ENGINE", Varchar(NAME_LEN),NOT_NULL), + Column("OPTIMIZER_DISK_READ_COST", Decimal(906), NOT_NULL), + Column("OPTIMIZER_INDEX_BLOCK_COPY_COST", Decimal(906), NOT_NULL), + Column("OPTIMIZER_KEY_COMPARE_COST", Decimal(906), NOT_NULL), + Column("OPTIMIZER_KEY_COPY_COST", Decimal(906), NOT_NULL), + Column("OPTIMIZER_KEY_LOOKUP_COST", Decimal(906), NOT_NULL), + Column("OPTIMIZER_KEY_NEXT_FIND_COST", Decimal(906), NOT_NULL), + Column("OPTIMIZER_DISK_READ_RATIO", Decimal(906), NOT_NULL), + Column("OPTIMIZER_ROW_COPY_COST", Decimal(906), NOT_NULL), + Column("OPTIMIZER_ROW_LOOKUP_COST", Decimal(906), NOT_NULL), + Column("OPTIMIZER_ROW_NEXT_FIND_COST", Decimal(906), NOT_NULL), + Column("OPTIMIZER_ROWID_COMPARE_COST", Decimal(906), NOT_NULL), + Column("OPTIMIZER_ROWID_COPY_COST", Decimal(906), NOT_NULL), + CEnd() +}; + + ST_FIELD_INFO show_explain_tabular_fields_info[]= { Column("id", SLonglong(3), NULLABLE, "id"), @@ -9954,6 +10020,8 @@ ST_SCHEMA_TABLE schema_tables[]= OPTIMIZE_I_S_TABLE|OPEN_TABLE_ONLY}, {"OPEN_TABLES", Show::open_tables_fields_info, 0, fill_open_tables, make_old_format, 0, -1, -1, 1, 0}, + {"OPTIMIZER_COSTS", Show::optimizer_costs_fields_info, 0, + fill_optimizer_costs_tables, 0, 0, -1,-1, 0, 0}, {"OPTIMIZER_TRACE", Show::optimizer_trace_info, 0, fill_optimizer_trace_info, NULL, NULL, -1, -1, false, 0}, {"PARAMETERS", Show::parameters_fields_info, 0, diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 0b09d52e217..d4fe31b25f1 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -8271,7 +8271,7 @@ assign_to_keycache_parts: key_cache_name: ident { $$= $1; } - | DEFAULT { $$ = default_key_cache_base; } + | DEFAULT { $$ = default_base; } ; preload: diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 894d2bede28..d5146026692 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -53,8 +53,9 @@ #include "debug_sync.h" // DEBUG_SYNC #include "sql_show.h" #include "opt_trace_context.h" - #include "log_event.h" +#include "optimizer_defaults.h" + #ifdef WITH_PERFSCHEMA_STORAGE_ENGINE #include "../storage/perfschema/pfs_server.h" #endif /* WITH_PERFSCHEMA_STORAGE_ENGINE */ @@ -6973,68 +6974,111 @@ static Sys_var_ulong Sys_optimizer_max_sel_arg_weight( SESSION_VAR(optimizer_max_sel_arg_weight), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, ULONG_MAX), DEFAULT(SEL_ARG::MAX_WEIGHT), BLOCK_SIZE(1)); -/* - We don't allow 100 for optimizer_cache_cost as there is always a small - cost of finding the key, on cached pages, that we have to take into account. -*/ -static bool update_optimizer_cache_hit_ratio(sys_var *self, THD *thd, - enum_var_type type) -{ - if (type == OPT_SESSION) - thd->optimizer_cache_hit_ratio= - cache_hit_ratio(thd->variables.optimizer_cache_hit_ratio); - return 0; -} - -static Sys_var_uint Sys_optimizer_cache_hit_ratio( - "optimizer_cache_hit_ratio", - "Expected hit rate of the row and index cache in storage engines. " - "The value should be an integer between 0 and 99, where 0 means cache is " - "empty and 99 means that value is almost always in the cache.", - SESSION_VAR(optimizer_cache_hit_ratio), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(0, 99), DEFAULT(DEFAULT_CACHE_HIT_RATIO), 1, NO_MUTEX_GUARD, - NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(update_optimizer_cache_hit_ratio)); - -static Sys_var_double Sys_optimizer_key_copy_cost( +static Sys_var_engine_optimizer_cost Sys_optimizer_disk_read_ratio( + "optimizer_disk_read_ratio", + "Chance that we have to do a disk read to find a row or index entry from " + "the engine cache (cache_misses/total_cache_requests). 0.0 means that " + "everything is cached and 1.0 means that nothing is expected to be in the " + "engine cache.", + COST_VAR(disk_read_ratio), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_DISK_READ_RATIO), + VALID_RANGE(0.0, 1.0), DEFAULT(DEFAULT_DISK_READ_RATIO), COST_ADJUST(1)); + +static Sys_var_engine_optimizer_cost Sys_optimizer_key_lookup_cost( + "optimizer_key_lookup_cost", + "Cost for finding a key based on a key value", + COST_VAR(key_lookup_cost), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_KEY_LOOKUP_COST), + VALID_RANGE(0, 1000), DEFAULT(DEFAULT_KEY_LOOKUP_COST), COST_ADJUST(1000)); + +static Sys_var_engine_optimizer_cost Sys_optimizer_row_lookup_cost( + "optimizer_row_lookup_cost", + "Cost of finding a row based on a rowid or a clustered key.", + COST_VAR(row_lookup_cost), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_ROW_LOOKUP_COST), + VALID_RANGE(0, 1000), DEFAULT(DEFAULT_ROW_LOOKUP_COST), COST_ADJUST(1000)); + +static Sys_var_engine_optimizer_cost Sys_optimizer_disk_read_cost( + "optimizer_disk_read_cost", + "Cost of reading a block of IO_SIZE (4096) from a disk (in usec).", + COST_VAR(disk_read_cost), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_DISK_READ_COST), + VALID_RANGE(0, 10000), DEFAULT(DEFAULT_DISK_READ_COST), COST_ADJUST(1000)); + +static Sys_var_engine_optimizer_cost Sys_optimizer_key_copy_cost( "optimizer_key_copy_cost", - "Cost of finding the next key in the engine and copying it to the SQL layer.", - SESSION_VAR(optimizer_key_copy_cost), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(0, 1), DEFAULT(DEFAULT_KEY_COPY_COST), NO_MUTEX_GUARD, - NOT_IN_BINLOG); + "Cost of finding the next key in the engine and copying it to the SQL " + "layer.", + COST_VAR(key_copy_cost), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_KEY_COPY_COST), + VALID_RANGE(0, 1000), DEFAULT(DEFAULT_KEY_COPY_COST), COST_ADJUST(1000)); -static Sys_var_double Sys_optimizer_index_block_copy_cost( +static Sys_var_engine_optimizer_cost Sys_optimizer_index_block_copy_cost( "optimizer_index_block_copy_cost", - "Cost of copying a key block from the cache to intern storage as part of an " - "index scan.", - SESSION_VAR(optimizer_index_block_copy_cost), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(0, 1), DEFAULT(DEFAULT_INDEX_BLOCK_COPY_COST), NO_MUTEX_GUARD, - NOT_IN_BINLOG); - -static Sys_var_double Sys_optimizer_key_next_find_cost( + "Cost of copying a key block from the cache to intern storage as part of " + "an index scan.", + COST_VAR(index_block_copy_cost), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_INDEX_BLOCK_COPY_COST), + VALID_RANGE(0, 1000), DEFAULT(DEFAULT_INDEX_BLOCK_COPY_COST), COST_ADJUST(1000)); + +static Sys_var_engine_optimizer_cost Sys_optimizer_row_next_find_cost( + "optimizer_row_next_find_cost", + "Cost of finding the next row when scanning the table.", + COST_VAR(row_next_find_cost), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_ROW_NEXT_FIND_COST), + VALID_RANGE(0, 1000), DEFAULT(DEFAULT_ROW_NEXT_FIND_COST), COST_ADJUST(1000)); + +static Sys_var_engine_optimizer_cost Sys_optimizer_key_next_find_cost( "optimizer_key_next_find_cost", "Cost of finding the next key and rowid when using filters.", - SESSION_VAR(optimizer_key_next_find_cost), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(0, 1), DEFAULT(DEFAULT_KEY_NEXT_FIND_COST), NO_MUTEX_GUARD, - NOT_IN_BINLOG); + COST_VAR(key_next_find_cost), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_KEY_NEXT_FIND_COST), + VALID_RANGE(0, 1000), DEFAULT(DEFAULT_KEY_NEXT_FIND_COST), COST_ADJUST(1000)); -static Sys_var_double Sys_optimizer_row_copy_cost( +static Sys_var_engine_optimizer_cost Sys_optimizer_row_copy_cost( "optimizer_row_copy_cost", "Cost of copying a row from the engine or the join cache to the SQL layer.", - SESSION_VAR(optimizer_row_copy_cost), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(0, 1), DEFAULT(DEFAULT_ROW_COPY_COST), NO_MUTEX_GUARD, - NOT_IN_BINLOG); + COST_VAR(row_copy_cost), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_ROW_COPY_COST), + VALID_RANGE(0, 1000), DEFAULT(DEFAULT_ROW_COPY_COST), COST_ADJUST(1000)); -static Sys_var_double Sys_optimizer_where_cost( - "optimizer_where_cost", - "Cost of checking the row against the WHERE clause.", - SESSION_VAR(optimizer_where_cost), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(0, 1), DEFAULT(DEFAULT_WHERE_COST), NO_MUTEX_GUARD, - NOT_IN_BINLOG); - -static Sys_var_double Sys_optimizer_key_cmp_cost( +static Sys_var_engine_optimizer_cost Sys_optimizer_key_cmp_cost( "optimizer_key_compare_cost", "Cost of checking a key against the end key condition.", - SESSION_VAR(optimizer_key_cmp_cost), CMD_LINE(REQUIRED_ARG), - VALID_RANGE(0, 1), DEFAULT(DEFAULT_KEY_COMPARE_COST), NO_MUTEX_GUARD, - NOT_IN_BINLOG); + COST_VAR(key_cmp_cost), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_KEY_CMP_COST), + VALID_RANGE(0, 1000), DEFAULT(DEFAULT_KEY_COMPARE_COST), COST_ADJUST(1000)); + +static Sys_var_engine_optimizer_cost Sys_optimizer_rowid_cmp_cost( + "optimizer_rowid_compare_cost", + "Cost of comparing two rowid's", + COST_VAR(rowid_cmp_cost), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_ROWID_CMP_COST), + VALID_RANGE(0, 1000), DEFAULT(DEFAULT_ROWID_COMPARE_COST), COST_ADJUST(1000)); + +static Sys_var_engine_optimizer_cost Sys_optimizer_rowid_copy_cost( + "optimizer_rowid_copy_cost", + "Cost of copying a rowid", + COST_VAR(rowid_copy_cost), + CMD_LINE(REQUIRED_ARG, OPT_COSTS_ROWID_COPY_COST), + VALID_RANGE(0, 1000), DEFAULT(DEFAULT_ROWID_COPY_COST), COST_ADJUST(1000)); + +/* The following costs are stored in THD and handler */ + +static Sys_var_optimizer_cost Sys_optimizer_where_cost( + "optimizer_where_cost", + "Cost of checking the row against the WHERE clause. Increasing this will " + "have the optimizer to prefer plans with less row combinations.", + SESSION_VAR(optimizer_where_cost), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 100000), DEFAULT(DEFAULT_WHERE_COST), COST_ADJUST(1000)); + +static Sys_var_optimizer_cost Sys_optimizer_scan_cost( + "optimizer_scan_setup_cost", + "Extra cost added to TABLE and INDEX scans to get optimizer to prefer " + "index lookups.", + SESSION_VAR(optimizer_scan_setup_cost), + CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 100000000), DEFAULT(DEFAULT_TABLE_SCAN_SETUP_COST), + COST_ADJUST(1000)); diff --git a/sql/sys_vars.inl b/sql/sys_vars.inl index b1d7bc31255..5997446a61e 100644 --- a/sql/sys_vars.inl +++ b/sql/sys_vars.inl @@ -32,6 +32,7 @@ #include "rpl_mi.h" // For Multi-Source Replication #include "debug_sync.h" #include "sql_acl.h" // check_global_access() +#include "optimizer_defaults.h" // create_optimizer_costs /* a set of mostly trivial (as in f(X)=X) defines below to make system variable @@ -40,6 +41,7 @@ #define VALID_RANGE(X,Y) X,Y #define DEFAULT(X) X #define BLOCK_SIZE(X) X +#define COST_ADJUST(X) X #define GLOBAL_VAR(X) sys_var::GLOBAL, (((char*)&(X))-(char*)&global_system_variables), sizeof(X) #define SESSION_VAR(X) sys_var::SESSION, offsetof(SV, X), sizeof(((SV *)0)->X) #define SESSION_ONLY(X) sys_var::ONLY_SESSION, offsetof(SV, X), sizeof(((SV *)0)->X) @@ -1048,7 +1050,7 @@ public: /* If no basename, assume it's for the key cache named 'default' */ if (!base_name->length) - base_name= &default_key_cache_base; + base_name= &default_base; key_cache= get_key_cache(base_name); @@ -1228,6 +1230,143 @@ public: { var->save_result.double_value= getopt_ulonglong2double(option.def_value); } }; + +/* + Optimizer costs + Stored as cost factor (1 cost = 1 ms). + Given and displayed as microsconds (as most values are very small) +*/ + +class Sys_var_optimizer_cost: public Sys_var_double +{ +public: + double cost_adjust; + Sys_var_optimizer_cost(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + double min_val, double max_val, double def_val, + ulong arg_cost_adjust, PolyLock *lock=0, + enum binlog_status_enum binlog_status_arg=VARIABLE_NOT_IN_BINLOG, + on_check_function on_check_func=0, + on_update_function on_update_func=0, + const char *substitute=0) + :Sys_var_double(name_arg, comment, flag_args, off, size, getopt, + min_val, max_val, def_val, lock, + binlog_status_arg, + on_check_func, + on_update_func, + substitute) + { + if (arg_cost_adjust == 1000) + { + show_val_type= SHOW_OPTIMIZER_COST; // For select @@var + option.var_type|= GET_ADJUST_VALUE; + } + cost_adjust= (double) arg_cost_adjust; + global_var(double)= (double)option.def_value/cost_adjust; // To usec + } + bool session_update(THD *thd, set_var *var) + { + session_var(thd, double)= var->save_result.double_value/cost_adjust; + return false; + } + bool global_update(THD *thd, set_var *var) + { + global_var(double)= var->save_result.double_value/cost_adjust; + return false; + } + void session_save_default(THD *thd, set_var *var) + { var->save_result.double_value= global_var(double) * cost_adjust; } + + void global_save_default(THD *thd, set_var *var) + { + var->save_result.double_value= getopt_ulonglong2double(option.def_value)* + cost_adjust; + } +}; + + +/* + The class for optimzer costs with structured names, unique for each engine. + Used as 'engine.variable_name' + + Class specific constructor arguments: + everything derived from Sys_var_optimizer_cost + + Backing store: double + + @note these variables can be only GLOBAL +*/ + +#define COST_VAR(X) GLOBAL_VAR(default_optimizer_costs.X) +#define cost_var_ptr(KC, OFF) (((uchar*)(KC))+(OFF)) +#define cost_var(KC, OFF) (*(double*)cost_var_ptr(KC, OFF)) +typedef bool (*cost_update_function)(THD *, OPTIMIZER_COSTS *, ptrdiff_t, + double, double); + +static bool update_cost(THD *thd, OPTIMIZER_COSTS *key_cache, + ptrdiff_t offset, double new_value, double cost_adjust) +{ + cost_var(key_cache, offset)= new_value / cost_adjust; + return 0; +} + + +class Sys_var_engine_optimizer_cost: public Sys_var_optimizer_cost +{ + cost_update_function cost_update; + public: + Sys_var_engine_optimizer_cost(const char *name_arg, + const char *comment, int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + double min_val, double max_val, double def_val, + long cost_adjust, PolyLock *lock= 0, + cost_update_function on_update_func= update_cost, + const char *substitute=0) + : Sys_var_optimizer_cost(name_arg, comment, flag_args, off, size, + getopt, min_val, max_val, def_val, cost_adjust, + lock, VARIABLE_NOT_IN_BINLOG, 0, + 0, substitute), + cost_update(on_update_func) + { + option.var_type|= GET_ASK_ADDR; + option.value= (uchar**)1; // crash me, please + // fix an offset from global_system_variables to be an offset in KEY_CACHE + offset= global_var_ptr() - (uchar*) &default_optimizer_costs; + SYSVAR_ASSERT(scope() == GLOBAL); + } + bool global_update(THD *thd, set_var *var) + { + double new_value= var->save_result.double_value; + LEX_CSTRING *base_name= &var->base; + OPTIMIZER_COSTS *optimizer_costs; + bool res; + + /* If no basename, assume it's for the default costs */ + if (!base_name->length) + base_name= &default_base; + + mysql_mutex_lock(&LOCK_optimizer_costs); + if (!(optimizer_costs= get_or_create_optimizer_costs(base_name->str, + base_name->length))) + { + mysql_mutex_unlock(&LOCK_optimizer_costs); + return true; + } + res= cost_update(thd, optimizer_costs, offset, new_value, cost_adjust); + mysql_mutex_unlock(&LOCK_optimizer_costs); + return res; + } + const uchar *global_value_ptr(THD *thd, const LEX_CSTRING *base) const + { + OPTIMIZER_COSTS *optimizer_costs= get_optimizer_costs(base); + if (!optimizer_costs) + optimizer_costs= &default_optimizer_costs; + return cost_var_ptr(optimizer_costs, offset); + } +}; + + /** The class for the @max_user_connections. It's derived from Sys_var_uint, but non-standard session value diff --git a/sql/table.cc b/sql/table.cc index 5b84cd46152..19a37e0d4d2 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -2290,7 +2290,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, share->keynames.count != keys)) goto err; - /* Allocate handler */ + /* Allocate handler */ if (!(handler_file= get_new_handler(share, thd->mem_root, plugin_hton(se_plugin)))) goto err; @@ -2788,6 +2788,8 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, DBUG_ASSERT((null_pos + (null_bit_pos + 7) / 8) <= share->field[0]->ptr); } + share->primary_key= MAX_KEY; + /* Fix key->name and key_part->field */ if (key_parts) { @@ -2919,6 +2921,11 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, } } + /* Primary key must be set early as engine may use it in index_flag() */ + share->primary_key= (primary_key < MAX_KEY && + share->keys_in_use.is_set(primary_key) ? + primary_key : MAX_KEY); + key_first_info= keyinfo; for (uint key=0 ; key < keys ; key++,keyinfo++) { @@ -3161,7 +3168,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, if (primary_key < MAX_KEY && (share->keys_in_use.is_set(primary_key))) { - share->primary_key= primary_key; + DBUG_ASSERT(share->primary_key == primary_key); /* If we are using an integer as the primary key then allow the user to refer to it as '_rowid' @@ -3178,10 +3185,10 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, } } else - share->primary_key = MAX_KEY; // we do not have a primary key + { + DBUG_ASSERT(share->primary_key == MAX_KEY); + } } - else - share->primary_key= MAX_KEY; if (new_field_pack_flag <= 1) { /* Old file format with default as not null */ @@ -3407,6 +3414,27 @@ err: } +/* + Make a copy of optimizer costs to be able to access these without any locks + and to allow the engine to update costs. +*/ + +void TABLE_SHARE::update_optimizer_costs(handlerton *hton) +{ + if (hton != view_pseudo_hton && !(hton->flags & HTON_HIDDEN)) + { + mysql_mutex_lock(&LOCK_optimizer_costs); + memcpy(&optimizer_costs, hton->optimizer_costs, sizeof(optimizer_costs)); + mysql_mutex_unlock(&LOCK_optimizer_costs); + } + else + { + bzero(&optimizer_costs, sizeof(optimizer_costs)); + MEM_UNDEFINED(&optimizer_costs, sizeof(optimizer_costs)); + } +} + + static bool sql_unusable_for_discovery(THD *thd, handlerton *engine, const char *sql) { @@ -5657,7 +5685,6 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) no_cache= false; initialize_opt_range_structures(); - /* Update optimizer_costs to ensure that a SET STATEMENT of the variables it will work. @@ -10418,10 +10445,10 @@ inline void TABLE::initialize_opt_range_structures() } -double TABLE::OPT_RANGE::index_only_fetch_cost(THD *thd) +double TABLE::OPT_RANGE::index_only_fetch_cost(TABLE *table) { - return (index_only_cost + (double) rows * - thd->variables.optimizer_key_copy_cost); + return (index_only_cost + + (double) rows * table->s->optimizer_costs.key_copy_cost); } diff --git a/sql/table.h b/sql/table.h index aa4b5c9a8fd..34514186b5a 100644 --- a/sql/table.h +++ b/sql/table.h @@ -813,6 +813,7 @@ struct TABLE_SHARE return is_view ? view_pseudo_hton : db_plugin ? plugin_hton(db_plugin) : NULL; } + OPTIMIZER_COSTS optimizer_costs; /* Copy of get_optimizer_costs() */ enum row_type row_type; /* How rows are stored */ enum Table_type table_type; enum tmp_table_type tmp_table; @@ -888,6 +889,7 @@ struct TABLE_SHARE bool has_update_default_function; bool can_do_row_logging; /* 1 if table supports RBR */ bool long_unique_table; + bool optimizer_costs_inited; ulong table_map_id; /* for row-based replication */ @@ -1194,6 +1196,7 @@ struct TABLE_SHARE void set_overlapped_keys(); void set_ignored_indexes(); key_map usable_indexes(THD *thd); + void update_optimizer_costs(handlerton *hton); }; /* not NULL, but cannot be dereferenced */ @@ -1420,7 +1423,7 @@ public: Cost of fetching keys with index only read and returning them to the sql level. */ - double index_only_fetch_cost(THD *thd); + double index_only_fetch_cost(TABLE *table); } *opt_range; /* Bitmaps of key parts that =const for the duration of join execution. If @@ -1736,6 +1739,12 @@ public: uint actual_n_key_parts(KEY *keyinfo); ulong actual_key_flags(KEY *keyinfo); int update_virtual_field(Field *vf, bool ignore_warnings); + inline size_t key_storage_length(uint index) + { + if (file->is_clustering_key(index)) + return s->stored_rec_length; + return key_info[index].key_length + file->ref_length; + } int update_virtual_fields(handler *h, enum_vcol_update_mode update_mode); int update_default_fields(bool ignore_errors); void evaluate_update_default_function(); diff --git a/sql/uniques.cc b/sql/uniques.cc index a09655bcaca..8555fc21624 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -159,7 +159,7 @@ inline double log2_n_fact(double x) total_buf_elems* log2(n_buffers) * ROWID_COMPARE_COST; */ -static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, +static double get_merge_buffers_cost(THD *thd, uint *buff_elems, uint elem_size, uint *first, uint *last, double compare_factor) { @@ -171,7 +171,8 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, size_t n_buffers= last - first + 1; /* Using log2(n)=log(n)/log(2) formula */ - return (2*((double)total_buf_elems*elem_size) / IO_SIZE + + return (2*((double)total_buf_elems*elem_size) / IO_SIZE * + default_optimizer_costs.disk_read_cost + total_buf_elems*log((double) n_buffers) * compare_factor / M_LN2); } @@ -185,6 +186,7 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, SYNOPSIS get_merge_many_buffs_cost() + thd THD, used to get disk_read_cost buffer buffer space for temporary data, at least Unique::get_cost_calc_buff_size bytes maxbuffer # of full buffers @@ -203,7 +205,8 @@ static double get_merge_buffers_cost(uint *buff_elems, uint elem_size, Cost of merge in disk seeks. */ -static double get_merge_many_buffs_cost(uint *buffer, +static double get_merge_many_buffs_cost(THD *thd, + uint *buffer, uint maxbuffer, uint max_n_elems, uint last_n_elems, int elem_size, double compare_factor) @@ -231,13 +234,13 @@ static double get_merge_many_buffs_cost(uint *buffer, uint lastbuff= 0; for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF) { - total_cost+=get_merge_buffers_cost(buff_elems, elem_size, + total_cost+=get_merge_buffers_cost(thd, buff_elems, elem_size, buff_elems + i, buff_elems + i + MERGEBUFF-1, compare_factor); lastbuff++; } - total_cost+=get_merge_buffers_cost(buff_elems, elem_size, + total_cost+=get_merge_buffers_cost(thd, buff_elems, elem_size, buff_elems + i, buff_elems + maxbuffer, compare_factor); @@ -246,7 +249,7 @@ static double get_merge_many_buffs_cost(uint *buffer, } /* Simulate final merge_buff call. */ - total_cost += get_merge_buffers_cost(buff_elems, elem_size, + total_cost += get_merge_buffers_cost(thd, buff_elems, elem_size, buff_elems, buff_elems + maxbuffer, compare_factor); return total_cost; @@ -304,7 +307,7 @@ static double get_merge_many_buffs_cost(uint *buffer, these will be random seeks. */ -double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size, +double Unique::get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size, size_t max_in_memory_size, double compare_factor, bool intersect_fl, bool *in_memory) @@ -312,7 +315,7 @@ double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size, size_t max_elements_in_tree; size_t last_tree_elems; size_t n_full_trees; /* number of trees in unique - 1 */ - double result; + double result, disk_read_cost; max_elements_in_tree= ((size_t) max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size)); @@ -345,14 +348,15 @@ double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size, First, add cost of writing all trees to disk, assuming that all disk writes are sequential. */ - result += DISK_SEEK_BASE_COST * n_full_trees * - ceil(((double) key_size)*max_elements_in_tree / IO_SIZE); - result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE); + disk_read_cost= DISK_READ_COST_THD(thd); + result += disk_read_cost * n_full_trees * + ceil(((double) key_size)*max_elements_in_tree / DISK_CHUNK_SIZE); + result += disk_read_cost * ceil(((double) key_size)*last_tree_elems / DISK_CHUNK_SIZE); /* Cost of merge */ if (intersect_fl) key_size+= sizeof(element_count); - double merge_cost= get_merge_many_buffs_cost(buffer, (uint)n_full_trees, + double merge_cost= get_merge_many_buffs_cost(thd, buffer, (uint)n_full_trees, (uint)max_elements_in_tree, (uint)last_tree_elems, key_size, compare_factor); @@ -361,7 +365,8 @@ double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size, Add cost of reading the resulting sequence, assuming there were no duplicate elements. */ - result += ceil((double)key_size*nkeys/IO_SIZE); + result+= (ceil((double)key_size*nkeys/IO_SIZE) * + default_optimizer_costs.disk_read_cost); return result; } diff --git a/sql/uniques.h b/sql/uniques.h index f4c45cde095..ecc49794efe 100644 --- a/sql/uniques.h +++ b/sql/uniques.h @@ -78,7 +78,7 @@ public: return log((double) tree_elems) * compare_factor / M_LN2; } - static double get_use_cost(uint *buffer, size_t nkeys, uint key_size, + static double get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size, size_t max_in_memory_size, double compare_factor, bool intersect_fl, bool *in_memory); inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size, diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc index 19a0ffe028a..2a8deb431b1 100644 --- a/storage/archive/ha_archive.cc +++ b/storage/archive/ha_archive.cc @@ -132,7 +132,8 @@ extern "C" PSI_file_key arch_key_file_data; static handler *archive_create_handler(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root); -int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share); +static int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share); +static void archive_update_optimizer_costs(OPTIMIZER_COSTS *costs); /* Number of rows that will force a bulk insert. @@ -205,6 +206,7 @@ static const char *ha_archive_exts[] = { NullS }; + int archive_db_init(void *p) { DBUG_ENTER("archive_db_init"); @@ -217,10 +219,10 @@ int archive_db_init(void *p) archive_hton= (handlerton *)p; archive_hton->db_type= DB_TYPE_ARCHIVE_DB; archive_hton->create= archive_create_handler; - archive_hton->flags= HTON_NO_FLAGS; archive_hton->discover_table= archive_discover; archive_hton->tablefile_extensions= ha_archive_exts; - + archive_hton->update_optimizer_costs= archive_update_optimizer_costs; + archive_hton->flags= HTON_NO_FLAGS; DBUG_RETURN(0); } @@ -267,7 +269,7 @@ ha_archive::ha_archive(handlerton *hton, TABLE_SHARE *table_arg) archive_reader_open= FALSE; } -int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share) +static int archive_discover(handlerton *hton, THD* thd, TABLE_SHARE *share) { DBUG_ENTER("archive_discover"); DBUG_PRINT("archive_discover", ("db: '%s' name: '%s'", share->db.str, @@ -1092,6 +1094,54 @@ int ha_archive::index_init(uint keynr, bool sorted) DBUG_RETURN(0); } +#define ARCHIVE_DECOMPRESS_TIME 0.081034543792841 // See optimizer_costs.txt + +static void archive_update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + costs->disk_read_ratio= 0.20; // Assume 80 % of data is cached by system + costs->row_lookup_cost= 0; // See rnd_pos_time + costs->key_lookup_cost= 0; // See key_read_time + costs->key_next_find_cost= 0; // Only unique indexes + costs->index_block_copy_cost= 0; +} + + +IO_AND_CPU_COST ha_archive::scan_time() +{ + IO_AND_CPU_COST cost; + ulonglong blocks; + DBUG_ENTER("ha_archive::scan_time"); + + blocks= stats.data_file_length / IO_SIZE; + cost.io= 0; // No cache + cost.cpu= (blocks * DISK_READ_COST * DISK_READ_RATIO + + blocks* ARCHIVE_DECOMPRESS_TIME); + DBUG_RETURN(cost); +} + + +IO_AND_CPU_COST ha_archive::keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) +{ + IO_AND_CPU_COST cost= scan_time(); + /* + As these is an unique indexe, assume that we have to scan half the file for + each range to find the row. + */ + cost.cpu= cost.cpu * ranges / 2; + return cost; +} + + +IO_AND_CPU_COST ha_archive::rnd_pos_time(ha_rows rows) +{ + IO_AND_CPU_COST cost; + /* We have to do one azseek() for each row */ + cost.io= rows2double(rows); + cost.cpu= rows * (DISK_READ_COST * DISK_READ_RATIO + ARCHIVE_DECOMPRESS_TIME); + return cost; +} + /* No indexes, so if we get a request for an index search since we tell @@ -1116,8 +1166,6 @@ int ha_archive::index_read_idx(uchar *buf, uint index, const uchar *key, current_k_offset= mkey->key_part->offset; current_key= key; current_key_len= key_len; - - DBUG_ENTER("ha_archive::index_read_idx"); rc= rnd_init(TRUE); diff --git a/storage/archive/ha_archive.h b/storage/archive/ha_archive.h index 2bb5079868b..c96f5d8d122 100644 --- a/storage/archive/ha_archive.h +++ b/storage/archive/ha_archive.h @@ -111,6 +111,10 @@ public: uint max_supported_key_length() const { return sizeof(ulonglong); } uint max_supported_key_part_length() const { return sizeof(ulonglong); } ha_rows records() { return share->rows_recorded; } + IO_AND_CPU_COST scan_time() override; + IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) override; + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override; int index_init(uint keynr, bool sorted); virtual int index_read(uchar * buf, const uchar * key, uint key_len, enum ha_rkey_function find_flag); diff --git a/storage/connect/ha_connect.h b/storage/connect/ha_connect.h index 71ceb7974ba..ca3b69bb552 100644 --- a/storage/connect/ha_connect.h +++ b/storage/connect/ha_connect.h @@ -308,13 +308,18 @@ public: /** @brief Called in test_quick_select to determine if indexes should be used. */ - virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; } + virtual IO_AND_CPU_COST scan_time() + { return { 0, (double) (stats.records+stats.deleted) * avg_io_cost() }; }; /** @brief This method will never be called if you do not implement indexes. */ - virtual double read_time(uint, uint, ha_rows rows) - { return (double) rows / 20.0+1; } + virtual IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) + { + return { 0, (double) rows * 0.001 }; + } + /* Everything below are methods that we implement in ha_connect.cc. diff --git a/storage/csv/ha_tina.h b/storage/csv/ha_tina.h index 043183444da..5a56dc6c4dd 100644 --- a/storage/csv/ha_tina.h +++ b/storage/csv/ha_tina.h @@ -124,7 +124,12 @@ public: /* Called in test_quick_select to determine if indexes should be used. */ - virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; } + virtual IO_AND_CPU_COST scan_time() + { + return { (double) ((share->saved_data_file_length + IO_SIZE-1))/ IO_SIZE * + avg_io_cost(), + (stats.records+stats.deleted) * ROW_NEXT_FIND_COST }; + } /* The next method will never be called */ virtual bool fast_key_read() { return 1;} /* diff --git a/storage/example/ha_example.h b/storage/example/ha_example.h index 2d3fa6d4216..3b11945b182 100644 --- a/storage/example/ha_example.h +++ b/storage/example/ha_example.h @@ -150,15 +150,40 @@ public: uint max_supported_key_length() const { return 0; } /** @brief - Called in test_quick_select to determine if indexes should be used. + Called in test_quick_select to determine cost of table scan */ - virtual double scan_time() { return (double) (stats.records+stats.deleted) / 20.0+10; } + virtual IO_AND_CPU_COST scan_time() + { + IO_AND_CPU_COST cost; + /* 0 blocks, 0.001 ms / row */ + cost.io= (double) (stats.records+stats.deleted) * avg_io_cost(); + cost.cpu= 0; + return cost; + } /** @brief This method will never be called if you do not implement indexes. */ - virtual double read_time(uint, uint, ha_rows rows) - { return (double) rows / 20.0+1; } + virtual IO_AND_CPU_COST keyread_time(uint, ulong, ha_rows rows, + ulonglong blocks) + { + IO_AND_CPU_COST cost; + cost.io= blocks * avg_io_cost(); + cost.cpu= (double) rows * 0.001; + return cost; + } + + /** @brief + Cost of fetching 'rows' records through rnd_pos() + */ + virtual IO_AND_CPU_COST rnd_pos_time(ha_rows rows) + { + IO_AND_CPU_COST cost; + /* 0 blocks, 0.001 ms / row */ + cost.io= 0; + cost.cpu= (double) rows * avg_io_cost(); + return cost; + } /* Everything below are methods that we implement in ha_example.cc. diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc index 76708e80105..2a375a41200 100644 --- a/storage/federated/ha_federated.cc +++ b/storage/federated/ha_federated.cc @@ -460,6 +460,20 @@ static void init_federated_psi_keys(void) #endif /* HAVE_PSI_INTERFACE */ /* + Federated doesn't need costs.disk_read_ratio as everything is one a + remote server and nothing is cached locally +*/ + +static void federated_update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + /* + Setting disk_read_ratios to 1.0, ensures we are using the costs + from rnd_pos_time() and scan_time() + */ + costs->disk_read_ratio= 1.0; +} + +/* Initialize the federated handler. SYNOPSIS @@ -485,6 +499,7 @@ int federated_db_init(void *p) federated_hton->rollback= federated_rollback; federated_hton->create= federated_create_handler; federated_hton->drop_table= [](handlerton *, const char*) { return -1; }; + federated_hton->update_optimizer_costs= federated_update_optimizer_costs; federated_hton->flags= HTON_ALTER_NOT_SUPPORTED | HTON_NO_PARTITION; /* @@ -905,20 +920,11 @@ ha_federated::ha_federated(handlerton *hton, :handler(hton, table_arg), mysql(0), stored_result(0) { - optimizer_cache_cost= 1; trx_next= 0; bzero(&bulk_insert, sizeof(bulk_insert)); } /* - Federated doesn't need optimizer_cache_cost as everything is one a - remote server and nothing is cached locally -*/ - -void ha_federated::set_optimizer_cache_cost(double cost) -{} - -/* Convert MySQL result set row to handler internal format SYNOPSIS diff --git a/storage/federated/ha_federated.h b/storage/federated/ha_federated.h index 3f25c2d7cb9..35e5f5c8215 100644 --- a/storage/federated/ha_federated.h +++ b/storage/federated/ha_federated.h @@ -180,20 +180,25 @@ public: The reason for "records * 1000" is that such a large number forces this to use indexes " */ - virtual double scan_time() + + IO_AND_CPU_COST scan_time() { DBUG_PRINT("info", ("records %lu", (ulong) stats.records)); - return (double)(stats.records*1000); + return + { + (double) (stats.mean_rec_length * stats.records)/IO_SIZE * avg_io_cost(), + 0 + }; } - virtual double read_time(uint index, uint ranges, ha_rows rows) + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) { - return rows2double(rows) + rows2double(ranges); + return { (double) stats.records * avg_io_cost(), 0 }; } - virtual double rnd_pos_time(ha_rows rows) + IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) { - return rows2double(rows); + return { (double) (ranges + rows) * avg_io_cost(), 0 }; } - virtual void set_optimizer_cache_cost(double cost); const key_map *keys_to_use_for_scanning() { return &key_map_full; } /* diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc index 22e22958a64..b93b7a94016 100644 --- a/storage/federatedx/ha_federatedx.cc +++ b/storage/federatedx/ha_federatedx.cc @@ -411,6 +411,20 @@ static select_handler* create_federatedx_select_handler(THD* thd, SELECT_LEX *sel); /* + Federated doesn't need costs.disk_read_ratio as everything is one a remote + server and nothing is cached locally +*/ + +static void federatedx_update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + /* + Setting disk_read_ratios to 1.0, ensures we are using the costs + from rnd_pos_time() and scan_time() + */ + costs->disk_read_ratio= 0.0; +} + +/* Initialize the federatedx handler. SYNOPSIS @@ -442,6 +456,7 @@ int federatedx_db_init(void *p) federatedx_hton->flags= HTON_ALTER_NOT_SUPPORTED; federatedx_hton->create_derived= create_federatedx_derived_handler; federatedx_hton->create_select= create_federatedx_select_handler; + federatedx_hton->update_optimizer_costs= federatedx_update_optimizer_costs; if (mysql_mutex_init(fe_key_mutex_federatedx, &federatedx_mutex, MY_MUTEX_INIT_FAST)) @@ -841,17 +856,9 @@ ha_federatedx::ha_federatedx(handlerton *hton, :handler(hton, table_arg), txn(0), io(0), stored_result(0) { - optimizer_cache_cost= 1; bzero(&bulk_insert, sizeof(bulk_insert)); } -/* - Federated doesn't need optimizer_cache_cost as everything is one a remote server and - nothing is cached locally -*/ - -void ha_federatedx::set_optimizer_cache_cost(double cost) -{} /* Convert MySQL result set row to handler internal format diff --git a/storage/federatedx/ha_federatedx.h b/storage/federatedx/ha_federatedx.h index a7b7833eae7..9d9267e3abf 100644 --- a/storage/federatedx/ha_federatedx.h +++ b/storage/federatedx/ha_federatedx.h @@ -367,20 +367,24 @@ public: The reason for "records * 1000" is that such a large number forces this to use indexes " */ - double scan_time() + IO_AND_CPU_COST scan_time() { DBUG_PRINT("info", ("records %lu", (ulong) stats.records)); - return (double)(stats.records*1000); + return + { + (double) (stats.mean_rec_length * stats.records)/8192 * avg_io_cost(), + 0 + }; } - double read_time(uint index, uint ranges, ha_rows rows) + IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) { - return rows2double(rows) + rows2double(ranges); + return { (double) (ranges + rows) * avg_io_cost(), 0 }; } - virtual double rnd_pos_time(ha_rows rows) + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) { - return rows2double(rows); + return { (double) rows * avg_io_cost(), 0 }; } - virtual void set_optimizer_cache_cost(double cost); const key_map *keys_to_use_for_scanning() { return &key_map_full; } /* diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc index f9b365cf91e..cc7dc79e508 100644 --- a/storage/heap/ha_heap.cc +++ b/storage/heap/ha_heap.cc @@ -42,6 +42,28 @@ static int heap_drop_table(handlerton *hton, const char *path) return error == ENOENT ? -1 : error; } +/* See optimizer_costs.txt for how the following values where calculated */ +#define HEAP_ROW_NEXT_FIND_COST 8.0166e-06 // For table scan +#define BTREE_KEY_NEXT_FIND_COST 0.00007739 // For binary tree scan +#define HEAP_LOOKUP_COST 0.00016097 // Heap lookup cost + +static void heap_update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + /* + A lot of values are 0 as heap supports all needed xxx_time() functions + */ + costs->disk_read_cost=0; // All data in memory + costs->disk_read_ratio= 0.0; // All data in memory + costs->key_next_find_cost= 0; + costs->key_copy_cost= 0; // Set in keyread_time() + costs->row_copy_cost= 2.334e-06; // This is small as its just a memcpy + costs->row_lookup_cost= 0; // Direct pointer + costs->row_next_find_cost= 0; + costs->key_lookup_cost= 0; + costs->key_next_find_cost= 0; + costs->index_block_copy_cost= 0; +} + int heap_init(void *p) { handlerton *heap_hton; @@ -53,6 +75,7 @@ int heap_init(void *p) heap_hton->create= heap_create_handler; heap_hton->panic= heap_panic; heap_hton->drop_table= heap_drop_table; + heap_hton->update_optimizer_costs= heap_update_optimizer_costs; heap_hton->flags= HTON_CAN_RECREATE; return 0; @@ -74,7 +97,6 @@ ha_heap::ha_heap(handlerton *hton, TABLE_SHARE *table_arg) :handler(hton, table_arg), file(0), records_changed(0), key_stat_version(0), internal_table(0) { - optimizer_cache_cost= 1.0; } /* @@ -230,6 +252,41 @@ void ha_heap::update_key_stats() } +IO_AND_CPU_COST ha_heap::keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) +{ + KEY *key=table->key_info+index; + if (key->algorithm == HA_KEY_ALG_BTREE) + { + double lookup_cost; + lookup_cost= ranges * costs->key_cmp_cost * log2(stats.records+1); + return {0, ranges * lookup_cost + (rows-ranges) * BTREE_KEY_NEXT_FIND_COST }; + } + else + { + return {0, (ranges * HEAP_LOOKUP_COST + + (rows-ranges) * BTREE_KEY_NEXT_FIND_COST) }; + } +} + + +IO_AND_CPU_COST ha_heap::scan_time() +{ + return {0, (double) (stats.records+stats.deleted) * HEAP_ROW_NEXT_FIND_COST }; +} + + +IO_AND_CPU_COST ha_heap::rnd_pos_time(ha_rows rows) +{ + /* + The row pointer is a direct pointer to the block. Thus almost instant + in practice. + Note that ha_rnd_pos_time() will add ROW_COPY_COST to this result + */ + return { 0, 0 }; +} + + int ha_heap::write_row(const uchar * buf) { int res; diff --git a/storage/heap/ha_heap.h b/storage/heap/ha_heap.h index 81978daa5d7..74a0a00a04c 100644 --- a/storage/heap/ha_heap.h +++ b/storage/heap/ha_heap.h @@ -62,22 +62,13 @@ public: const key_map *keys_to_use_for_scanning() { return &btree_keys; } uint max_supported_keys() const { return MAX_KEY; } uint max_supported_key_part_length() const { return MAX_KEY_LENGTH; } - double scan_time() override - { return (double) (stats.records+stats.deleted) / 20.0+10; } - double read_time(uint index, uint ranges, ha_rows rows) override - { return (double) (rows +1)/ 20.0; } - double keyread_time(uint index, uint ranges, ha_rows rows) override - { return (double) (rows + ranges) / 20.0 ; } - double avg_io_cost() - { return 0.05; } /* 1/20 */ - double rnd_pos_time(ha_rows rows) override - { - return (double) rows/ 20.0; - } - /* - Heap doesn't need optimizer_cache_cost as everything is in memory and - it supports all needed _time() functions - */ + IO_AND_CPU_COST scan_time() override; + IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) override; + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override; + /* 0 for avg_io_cost ensures that there are no read-block calculations */ + double avg_io_cost() override { return 0.0; } + int open(const char *name, int mode, uint test_if_locked); int close(void); void set_keys_for_scanning(void); @@ -88,10 +79,6 @@ public: ulonglong nb_desired_values, ulonglong *first_value, ulonglong *nb_reserved_values); - void set_optimizer_cache_cost(double cost) override - { - optimizer_cache_cost= 1.0; - } int index_read_map(uchar * buf, const uchar * key, key_part_map keypart_map, enum ha_rkey_function find_flag); int index_read_last_map(uchar *buf, const uchar *key, key_part_map keypart_map); diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 5d796cb9348..875253f931a 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -6501,6 +6501,7 @@ search_loop: DBUG_EXECUTE_IF("bug14007649", DBUG_RETURN(n_rows);); +#ifdef NOT_USED /* Do not estimate the number of rows in the range to over 1 / 2 of the estimated rows in the whole table */ @@ -6515,6 +6516,10 @@ search_loop: if (n_rows == 0) n_rows= table_n_rows; } +#else + if (n_rows > table_n_rows) + n_rows= table_n_rows; +#endif DBUG_RETURN(n_rows); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 7f88b42e08f..5fa31017d24 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4079,6 +4079,26 @@ static int innodb_init_params() DBUG_RETURN(0); } + +/*********************************************************************//** +Setup costs factors for InnoDB to be able to approximate how many +ms different opperations takes. See cost functions in handler.h how +the different variables are used */ + +static void innobase_update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + /* + The following number was found by check_costs.pl when using 1M rows + and all rows are cached. See optimizer_costs.txt for details + */ + costs->row_next_find_cost= 0.00007013; + costs->row_lookup_cost= 0.00076597; + costs->key_next_find_cost= 0.00009900; + costs->key_lookup_cost= 0.00079112; + costs->row_copy_cost= 0.00006087; +} + + /** Initialize the InnoDB storage engine plugin. @param[in,out] p InnoDB handlerton @return error code @@ -4146,6 +4166,8 @@ static int innodb_init(void* p) innobase_hton->prepare_commit_versioned = innodb_prepare_commit_versioned; + innobase_hton->update_optimizer_costs= innobase_update_optimizer_costs; + innodb_remember_check_sysvar_funcs(); compile_time_assert(DATA_MYSQL_TRUE_VARCHAR == MYSQL_TYPE_VARCHAR); @@ -5079,10 +5101,10 @@ ha_innobase::index_flags( } ulong flags= key == table_share->primary_key - ? HA_CLUSTERED_INDEX : 0; + ? HA_CLUSTERED_INDEX : HA_KEYREAD_ONLY; flags |= HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER - | HA_READ_RANGE | HA_KEYREAD_ONLY + | HA_READ_RANGE | HA_DO_INDEX_COND_PUSHDOWN | HA_DO_RANGE_FILTER_PUSHDOWN; @@ -14336,13 +14358,15 @@ ha_innobase::estimate_rows_upper_bound() DBUG_RETURN((ha_rows) estimate); } + /*********************************************************************//** How many seeks it will take to read through the table. This is to be comparable to the number returned by records_in_range so that we can decide if we should scan the table or use keys. @return estimated time measured in disk seeks */ -double +#ifdef NOT_USED +IO_AND_CPU_COST ha_innobase::scan_time() /*====================*/ { @@ -14362,24 +14386,28 @@ ha_innobase::scan_time() TODO: This will be further improved to return some approximate estimate but that would also needs pre-population of stats structure. As of now approach is in sync with MyISAM. */ - return(ulonglong2double(stats.data_file_length) / IO_SIZE + 2); + return { (ulonglong2double(stats.data_file_length) / IO_SIZE * avg_io_cost()), 0.0 }; } ulint stat_clustered_index_size; - + IO_AND_CPU_COST cost; ut_a(m_prebuilt->table->stat_initialized); stat_clustered_index_size = m_prebuilt->table->stat_clustered_index_size; - return((double) stat_clustered_index_size); + cost.io= (double) stat_clustered_index_size * avg_io_cost(); + cost.cpu= 0; + return(cost); } +#endif /******************************************************************//** Calculate the time it takes to read a set of ranges through an index This enables us to optimise reads for clustered indexes. @return estimated time measured in disk seeks */ +#ifdef NOT_USED double ha_innobase::read_time( /*===================*/ @@ -14404,14 +14432,14 @@ ha_innobase::read_time( return(time_for_scan); } - return(ranges + (double) rows / (double) total_rows * time_for_scan); + return(ranges * KEY_LOOKUP_COST + (double) rows / (double) total_rows * time_for_scan); } /******************************************************************//** Calculate the time it takes to read a set of rows with primary key. */ -double +IO_AND_CPU_COST ha_innobase::rnd_pos_time(ha_rows rows) { ha_rows total_rows; @@ -14419,15 +14447,18 @@ ha_innobase::rnd_pos_time(ha_rows rows) /* Assume that the read time is proportional to the scan time for all rows + at most one seek per range. */ - double time_for_scan = scan_time(); + IO_AND_CPU_COST time_for_scan = scan_time(); if ((total_rows = estimate_rows_upper_bound()) < rows) { return(time_for_scan); } - - return((double) rows + (double) rows / (double) total_rows * time_for_scan); + double frac= (double) rows + (double) rows / (double) total_rows; + time_for_scan.io*= frac; + time_for_scan.cpu*= frac; + return(time_for_scan); } +#endif /*********************************************************************//** Calculates the key number used inside MySQL for an Innobase index. diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index ec466bbc30a..87e730dc137 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -105,12 +105,10 @@ public: int close(void) override; - double scan_time() override; - - double read_time(uint index, uint ranges, ha_rows rows) override; - +#ifdef NOT_USED + IO_AND_CPU_COST scan_time() override; double rnd_pos_time(ha_rows rows) override; - +#endif int write_row(const uchar * buf) override; int update_row(const uchar * old_data, const uchar * new_data) override; diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index abe7834f36d..275df557dbd 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -1100,14 +1100,44 @@ ulong ha_maria::index_flags(uint inx, uint part, bool all_parts) const } -double ha_maria::scan_time() +/* + Update costs that are unique for this TABLE instance +*/ + +void ha_maria::update_optimizer_costs(OPTIMIZER_COSTS *costs) { - if (file->s->data_file_type == BLOCK_RECORD) - return (ulonglong2double(stats.data_file_length - file->s->block_size) / - file->s->block_size) + 2; - return handler::scan_time(); + /* + Default costs for Aria with BLOCK_FORMAT is the same as MariaDB default + costs. + */ + if (file->s->data_file_type != BLOCK_RECORD) + { + /* + MyISAM format row lookup costs are slow as the row data is on a not + cached file. Costs taken from ha_myisam.cc + */ + costs->row_next_find_cost= 0.000063539; + costs->row_lookup_cost= 0.001014818; + } } + +IO_AND_CPU_COST ha_maria::rnd_pos_time(ha_rows rows) +{ + IO_AND_CPU_COST cost= handler::rnd_pos_time(rows); + /* file may be 0 if this is an internal temporary file that is not yet opened */ + if (file && file->s->data_file_type != BLOCK_RECORD) + { + /* + Row data is not cached. costs.row_lookup_cost includes the cost of + the reading the row from system (probably cached by the OS). + */ + cost.io= 0; + } + return cost; +} + + /* We need to be able to store at least 2 keys on an index page as the splitting algorithms depends on this. (With only one key on a page @@ -3788,6 +3818,12 @@ bool ha_maria::is_changed() const return file->state->changed; } +static void aria_update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + costs->rowid_copy_cost= 0.000001; // Just a short memcopy + costs->rowid_cmp_cost= 0.000001; // Just a short memcmp +} + static int ha_maria_init(void *p) { @@ -3820,6 +3856,7 @@ static int ha_maria_init(void *p) maria_hton->show_status= maria_show_status; maria_hton->prepare_for_backup= maria_prepare_for_backup; maria_hton->end_backup= maria_end_backup; + maria_hton->update_optimizer_costs= aria_update_optimizer_costs; /* TODO: decide if we support Maria being used for log tables */ maria_hton->flags= (HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES | diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h index 6b4302145dd..38919d5c542 100644 --- a/storage/maria/ha_maria.h +++ b/storage/maria/ha_maria.h @@ -77,8 +77,6 @@ public: { return max_supported_key_length(); } enum row_type get_row_type() const override final; void change_table_ptr(TABLE *table_arg, TABLE_SHARE *share) override final; - virtual double scan_time() override final; - int open(const char *name, int mode, uint test_if_locked) override; int close(void) override final; int write_row(const uchar * buf) override; @@ -114,6 +112,8 @@ public: int remember_rnd_pos() override final; int restart_rnd_next(uchar * buf) override final; void position(const uchar * record) override final; + void update_optimizer_costs(OPTIMIZER_COSTS *costs) override final; + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override final; int info(uint) override final; int info(uint, my_bool); int extra(enum ha_extra_function operation) override final; diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 144b10a86da..2f187090f53 100644 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -3876,7 +3876,7 @@ restart: { pagecache_pthread_mutex_unlock(&pagecache->cache_lock); DBUG_ASSERT(0); - return (uchar*) 0; + DBUG_RETURN((uchar*) 0); } } /* diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp index 7787f8b83b5..a6693924d9e 100644 --- a/storage/mroonga/ha_mroonga.cpp +++ b/storage/mroonga/ha_mroonga.cpp @@ -13008,9 +13008,9 @@ int ha_mroonga::truncate() DBUG_RETURN(error); } -double ha_mroonga::wrapper_scan_time() +IO_AND_CPU_COST ha_mroonga::wrapper_scan_time() { - double res; + IO_AND_CPU_COST res; MRN_DBUG_ENTER_METHOD(); MRN_SET_WRAP_SHARE_KEY(share, table->s); MRN_SET_WRAP_TABLE_KEY(this, table); @@ -13020,17 +13020,16 @@ double ha_mroonga::wrapper_scan_time() DBUG_RETURN(res); } -double ha_mroonga::storage_scan_time() +IO_AND_CPU_COST ha_mroonga::storage_scan_time() { MRN_DBUG_ENTER_METHOD(); - double time = handler::scan_time(); - DBUG_RETURN(time); + DBUG_RETURN(handler::scan_time()); } -double ha_mroonga::scan_time() +IO_AND_CPU_COST ha_mroonga::scan_time() { MRN_DBUG_ENTER_METHOD(); - double time; + IO_AND_CPU_COST time; if (share->wrapper_mode) { time = wrapper_scan_time(); @@ -13040,51 +13039,87 @@ double ha_mroonga::scan_time() DBUG_RETURN(time); } -double ha_mroonga::wrapper_read_time(uint index, uint ranges, ha_rows rows) +IO_AND_CPU_COST ha_mroonga::wrapper_rnd_pos_time(ha_rows rows) +{ + IO_AND_CPU_COST res; + MRN_DBUG_ENTER_METHOD(); + MRN_SET_WRAP_SHARE_KEY(share, table->s); + MRN_SET_WRAP_TABLE_KEY(this, table); + res = wrap_handler->rnd_pos_time(rows); + MRN_SET_BASE_SHARE_KEY(share, table->s); + MRN_SET_BASE_TABLE_KEY(this, table); + DBUG_RETURN(res); +} + +IO_AND_CPU_COST ha_mroonga::storage_rnd_pos_time(ha_rows rows) { - double res; + MRN_DBUG_ENTER_METHOD(); + IO_AND_CPU_COST time = handler::rnd_pos_time(rows); + DBUG_RETURN(time); +} + + +IO_AND_CPU_COST ha_mroonga::rnd_pos_time(ha_rows rows) +{ + MRN_DBUG_ENTER_METHOD(); + IO_AND_CPU_COST time; + if (share->wrapper_mode) + { + time = wrapper_rnd_pos_time(rows); + } else { + time = storage_rnd_pos_time(rows); + } + DBUG_RETURN(time); +} + + +IO_AND_CPU_COST ha_mroonga::wrapper_keyread_time(uint index, ulong ranges, + ha_rows rows, ulonglong blocks) +{ + IO_AND_CPU_COST res; MRN_DBUG_ENTER_METHOD(); if (index < MAX_KEY) { KEY *key_info = &(table->key_info[index]); if (mrn_is_geo_key(key_info)) { - res = handler::read_time(index, ranges, rows); + res = handler::keyread_time(index, ranges, rows, blocks); DBUG_RETURN(res); } MRN_SET_WRAP_SHARE_KEY(share, table->s); MRN_SET_WRAP_TABLE_KEY(this, table); - res = wrap_handler->read_time(share->wrap_key_nr[index], ranges, rows); + res = wrap_handler->keyread_time(share->wrap_key_nr[index], ranges, rows, blocks); MRN_SET_BASE_SHARE_KEY(share, table->s); MRN_SET_BASE_TABLE_KEY(this, table); } else { MRN_SET_WRAP_SHARE_KEY(share, table->s); MRN_SET_WRAP_TABLE_KEY(this, table); - res = wrap_handler->read_time(index, ranges, rows); + res = wrap_handler->keyread_time(index, ranges, rows, blocks); MRN_SET_BASE_SHARE_KEY(share, table->s); MRN_SET_BASE_TABLE_KEY(this, table); } DBUG_RETURN(res); } -double ha_mroonga::storage_read_time(uint index, uint ranges, ha_rows rows) +IO_AND_CPU_COST ha_mroonga::storage_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks) { MRN_DBUG_ENTER_METHOD(); - double time = handler::read_time(index, ranges, rows); + IO_AND_CPU_COST time = handler::keyread_time(index, ranges, rows, blocks); DBUG_RETURN(time); } -double ha_mroonga::read_time(uint index, uint ranges, ha_rows rows) +IO_AND_CPU_COST ha_mroonga::keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks) { MRN_DBUG_ENTER_METHOD(); - double time; + IO_AND_CPU_COST time; if (share->wrapper_mode) { - time = wrapper_read_time(index, ranges, rows); + time = wrapper_keyread_time(index, ranges, rows, blocks); } else { - time = storage_read_time(index, ranges, rows); + time = storage_keyread_time(index, ranges, rows, blocks); } DBUG_RETURN(time); } + #ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING const key_map *ha_mroonga::wrapper_keys_to_use_for_scanning() { diff --git a/storage/mroonga/ha_mroonga.hpp b/storage/mroonga/ha_mroonga.hpp index 66767899e21..827714f5edb 100644 --- a/storage/mroonga/ha_mroonga.hpp +++ b/storage/mroonga/ha_mroonga.hpp @@ -531,8 +531,9 @@ public: int end_bulk_insert() mrn_override; int delete_all_rows() mrn_override; int truncate() mrn_override; - double scan_time() mrn_override; - double read_time(uint index, uint ranges, ha_rows rows) mrn_override; + IO_AND_CPU_COST scan_time() mrn_override; + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) mrn_override; + IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks) mrn_override; #ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING const key_map *keys_to_use_for_scanning() mrn_override; #endif @@ -1106,10 +1107,12 @@ private: int wrapper_truncate_index(); int storage_truncate(); int storage_truncate_index(); - double wrapper_scan_time(); - double storage_scan_time(); - double wrapper_read_time(uint index, uint ranges, ha_rows rows); - double storage_read_time(uint index, uint ranges, ha_rows rows); + IO_AND_CPU_COST wrapper_scan_time(); + IO_AND_CPU_COST storage_scan_time(); + IO_AND_CPU_COST wrapper_rnd_pos_time(ha_rows rows); + IO_AND_CPU_COST storage_rnd_pos_time(ha_rows rows); + IO_AND_CPU_COST wrapper_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks); + IO_AND_CPU_COST storage_keyread_time(uint index, ulong ranges, ha_rows rows, ulonglong blocks); #ifdef MRN_HANDLER_HAVE_KEYS_TO_USE_FOR_SCANNING const key_map *wrapper_keys_to_use_for_scanning(); const key_map *storage_keys_to_use_for_scanning(); diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index a1de9edd997..bbae99ce2d3 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -804,6 +804,17 @@ ulong ha_myisam::index_flags(uint inx, uint part, bool all_parts) const return flags; } +IO_AND_CPU_COST ha_myisam::rnd_pos_time(ha_rows rows) +{ + IO_AND_CPU_COST cost= handler::rnd_pos_time(rows); + /* + Row data is not cached. costs.row_lookup_cost includes the cost of + the reading the row from system (probably cached by the OS). + */ + cost.io= 0; + return cost; +} + /* Name is here without an extension */ int ha_myisam::open(const char *name, int mode, uint test_if_locked) @@ -2577,6 +2588,22 @@ static int myisam_drop_table(handlerton *hton, const char *path) return mi_delete_table(path); } + +void myisam_update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + /* + MyISAM row lookup costs are slow as the row data is not cached + The following numbers where found by check_costs.pl when using 1M rows + and all rows are cached. See optimizer_costs.txt + */ + costs->row_next_find_cost= 0.000063539; + costs->row_lookup_cost= 0.001014818; + costs->key_next_find_cost= 0.000090585; + costs->key_lookup_cost= 0.000550142; + costs->key_copy_cost= 0.000015685; +} + + static int myisam_init(void *p) { handlerton *hton; @@ -2596,6 +2623,7 @@ static int myisam_init(void *p) hton->create= myisam_create_handler; hton->drop_table= myisam_drop_table; hton->panic= myisam_panic; + hton->update_optimizer_costs= myisam_update_optimizer_costs; hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES; hton->tablefile_extensions= ha_myisam_exts; mi_killed= mi_killed_in_mariadb; diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h index 3843004cc6e..55ce19494d9 100644 --- a/storage/myisam/ha_myisam.h +++ b/storage/myisam/ha_myisam.h @@ -82,14 +82,14 @@ class ha_myisam final : public handler int index_first(uchar * buf); int index_last(uchar * buf); int index_next_same(uchar *buf, const uchar *key, uint keylen); - int ft_init() + int ft_init() override { if (!ft_handler) return 1; ft_handler->please->reinit_search(ft_handler); return 0; } - FT_INFO *ft_init_ext(uint flags, uint inx,String *key) + FT_INFO *ft_init_ext(uint flags, uint inx,String *key) override { return ft_init_search(flags,file,inx, (uchar *)key->ptr(), key->length(), key->charset(), @@ -102,6 +102,7 @@ class ha_myisam final : public handler int remember_rnd_pos(); int restart_rnd_next(uchar *buf); void position(const uchar *record); + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override; int info(uint); int extra(enum ha_extra_function operation); int extra_opt(enum ha_extra_function operation, ulong cache_size); diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc index d37636abab7..8a1a24d8b82 100644 --- a/storage/myisammrg/ha_myisammrg.cc +++ b/storage/myisammrg/ha_myisammrg.cc @@ -339,6 +339,32 @@ static void myrg_set_external_ref(MYRG_INFO *m_info, void *ext_ref_arg) } } +IO_AND_CPU_COST ha_myisammrg::rnd_pos_time(ha_rows rows) +{ + IO_AND_CPU_COST cost= handler::rnd_pos_time(rows); + /* + Row data is notcached. costs.row_lookup_cost includes the cost of + the reading the row from system (probably cached by the OS). + */ + cost.io= 0; + return cost; +} + +IO_AND_CPU_COST ha_myisammrg::keyread_time(uint index, ulong ranges, + ha_rows rows, + ulonglong blocks) +{ + IO_AND_CPU_COST cost= handler::keyread_time(index, ranges, rows, blocks); + if (!blocks) + { + cost.io*= file->tables; + cost.cpu*= file->tables; + } + /* Add the cost of having to do a key lookup in all trees */ + cost.cpu+= (file->tables-1) * (ranges * KEY_LOOKUP_COST); + return cost; +} + /** Open a MERGE parent table, but not its children. @@ -1744,6 +1770,12 @@ int myisammrg_panic(handlerton *hton, ha_panic_function flag) return myrg_panic(flag); } +static void myisammrg_update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + myisam_update_optimizer_costs(costs); +} + + static int myisammrg_init(void *p) { handlerton *myisammrg_hton; @@ -1759,7 +1791,7 @@ static int myisammrg_init(void *p) myisammrg_hton->panic= myisammrg_panic; myisammrg_hton->flags= HTON_NO_PARTITION; myisammrg_hton->tablefile_extensions= ha_myisammrg_exts; - + myisammrg_hton->update_optimizer_costs= myisammrg_update_optimizer_costs; return 0; } diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h index 6da327ec84b..6ccf29c7042 100644 --- a/storage/myisammrg/ha_myisammrg.h +++ b/storage/myisammrg/ha_myisammrg.h @@ -102,9 +102,17 @@ public: uint max_supported_keys() const { return MI_MAX_KEY; } uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; } uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; } - double scan_time() - { return ulonglong2double(stats.data_file_length) / IO_SIZE + file->tables; } - + IO_AND_CPU_COST scan_time() override + { + IO_AND_CPU_COST cost; + cost.io= (ulonglong2double(stats.data_file_length) / IO_SIZE + + file->tables) * avg_io_cost(); + cost.cpu= records() * ROW_NEXT_FIND_COST; + return cost; + } + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) override; + IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) override; int open(const char *name, int mode, uint test_if_locked); int add_children_list(void); int attach_children(void); diff --git a/storage/oqgraph/ha_oqgraph.h b/storage/oqgraph/ha_oqgraph.h index c8e175df616..d1f5a898ad7 100644 --- a/storage/oqgraph/ha_oqgraph.h +++ b/storage/oqgraph/ha_oqgraph.h @@ -74,9 +74,10 @@ public: const char **bas_ext() const; uint max_supported_keys() const { return MAX_KEY; } uint max_supported_key_part_length() const { return MAX_KEY_LENGTH; } - double scan_time() { return (double) 1000000000; } - double read_time(uint index, uint ranges, ha_rows rows) - { return 1; } + IO_AND_CPU_COST scan_time() + { return { (double) 1000000000, (double) 1000000000 }; } + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) + { return { (double) rows, (double) rows }; } // Doesn't make sense to change the engine on a virtual table. virtual bool can_switch_engines() { return false; } diff --git a/storage/perfschema/ha_perfschema.h b/storage/perfschema/ha_perfschema.h index eab97434265..20ed7448a1e 100644 --- a/storage/perfschema/ha_perfschema.h +++ b/storage/perfschema/ha_perfschema.h @@ -104,8 +104,10 @@ public: ha_rows estimate_rows_upper_bound(void) { return HA_POS_ERROR; } - double scan_time(void) - { return 1.0; } + IO_AND_CPU_COST scan_time(void) + { + return {0.0, 1.0}; + } /** Open a performance schema table. diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 2fcd729af6d..278732c6832 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -14627,15 +14627,18 @@ bool ha_rocksdb::use_read_free_rpl() const { } #endif // MARIAROCKS_NOT_YET -double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) { +IO_AND_CPU_COST ha_rocksdb::keyread_time(uint index, ulong ranges, + ha_rows rows, + ulonglong blocks) { DBUG_ENTER_FUNC(); if (index != table->s->primary_key) { /* Non covering index range scan */ - DBUG_RETURN(handler::read_time(index, ranges, rows)); + DBUG_RETURN(handler::keyread_time(index, ranges, rows, blocks)); } - DBUG_RETURN((rows / 20.0) + 1); + IO_AND_CPU_COST cost= {0, (rows / 20.0) + ranges }; + DBUG_RETURN(cost); } void ha_rocksdb::print_error(int error, myf errflag) { diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index 63bf7ffd602..d40fc539b0c 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -623,14 +623,17 @@ public: bool sorted) override MY_ATTRIBUTE((__warn_unused_result__)); - virtual double scan_time() override { + virtual IO_AND_CPU_COST scan_time() override + { + IO_AND_CPU_COST cost; DBUG_ENTER_FUNC(); - - DBUG_RETURN( - static_cast<double>((stats.records + stats.deleted) / 20.0 + 10)); + cost.io= 0; + cost.cpu= (stats.records + stats.deleted) * 0.001 + 1; + DBUG_RETURN(cost); } + IO_AND_CPU_COST keyread_time(uint index, ulong ranges, + ha_rows rows, ulonglong blocks) override; - virtual double read_time(uint, uint, ha_rows rows) override; virtual void print_error(int error, myf errflag) override; int open(const char *const name, int mode, uint test_if_locked) override diff --git a/storage/sequence/sequence.cc b/storage/sequence/sequence.cc index f5a18094521..eb79d25630c 100644 --- a/storage/sequence/sequence.cc +++ b/storage/sequence/sequence.cc @@ -100,9 +100,7 @@ public: int index_last(uchar *buf); ha_rows records_in_range(uint inx, const key_range *start_key, const key_range *end_key, page_range *pages); - double scan_time() { return (double)nvalues(); } - double read_time(uint index, uint ranges, ha_rows rows) { return (double)rows; } - double keyread_time(uint index, uint ranges, ha_rows rows) { return (double)rows; } + double avg_io_cost() override { return 0.0; } private: void set(uchar *buf); @@ -492,6 +490,13 @@ int ha_seq_group_by_handler::next_row() DBUG_RETURN(0); } +static void sequence_update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + costs->disk_read_ratio= 0.0; // No disk + costs->key_next_find_cost= costs->key_lookup_cost= + costs->key_copy_cost= costs->row_lookup_cost= + costs->row_copy_cost= 0.0000062391530550; +} /***************************************************************************** Initialize the interface between the sequence engine and MariaDB @@ -518,6 +523,7 @@ static int init(void *p) hton->savepoint_set= hton->savepoint_rollback= hton->savepoint_release= dummy_savepoint; hton->create_group_by= create_group_by_handler; + hton->update_optimizer_costs= sequence_update_optimizer_costs; return 0; } diff --git a/storage/sphinx/ha_sphinx.h b/storage/sphinx/ha_sphinx.h index f03e9d8c797..0b3883f107c 100644 --- a/storage/sphinx/ha_sphinx.h +++ b/storage/sphinx/ha_sphinx.h @@ -72,14 +72,28 @@ public: uint max_supported_key_length () const { return MAX_KEY_LENGTH; } uint max_supported_key_part_length () const { return MAX_KEY_LENGTH; } - #if MYSQL_VERSION_ID>50100 - virtual double scan_time () { return (double)( stats.records+stats.deleted )/20.0 + 10; } ///< called in test_quick_select to determine if indexes should be used - #else - virtual double scan_time () { return (double)( records+deleted )/20.0 + 10; } ///< called in test_quick_select to determine if indexes should be used - #endif - - virtual double read_time(uint index, uint ranges, ha_rows rows) - { return ranges + (double)rows/20.0 + 1; } ///< index read time estimate + IO_AND_CPU_COST scan_time () + { + IO_AND_CPU_COST cost; + cost.io= 0; + cost.cpu= (double) (stats.records+stats.deleted) * avg_io_cost(); + return cost; + } + IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) + { + IO_AND_CPU_COST cost; + cost.io= ranges; + cost.cpu= 0; + return cost; + } + IO_AND_CPU_COST rnd_pos_time(ha_rows rows) + { + IO_AND_CPU_COST cost; + cost.io= 0; + cost.cpu= 0; + return cost; + } public: int open ( const char * name, int mode, uint test_if_locked ); diff --git a/storage/spider/ha_spider.cc b/storage/spider/ha_spider.cc index b8d33e38c4e..d31f48e4c09 100644 --- a/storage/spider/ha_spider.cc +++ b/storage/spider/ha_spider.cc @@ -8508,38 +8508,47 @@ int ha_spider::truncate() DBUG_RETURN(0); } - -double ha_spider::scan_time() +IO_AND_CPU_COST ha_spider::scan_time() { + IO_AND_CPU_COST cost; DBUG_ENTER("ha_spider::scan_time"); DBUG_PRINT("info",("spider this=%p", this)); - DBUG_PRINT("info",("spider scan_time = %.6f", - share->scan_rate * share->stat.records * share->stat.mean_rec_length + 2)); - DBUG_RETURN(share->scan_rate * share->stat.records * - share->stat.mean_rec_length + 2); + cost.io=0; + cost.cpu= (DISK_READ_COST * share->stat.records * share->stat.mean_rec_length); + DBUG_PRINT("info",("spider scan_time = %.6f", cost.cpu)); + DBUG_RETURN(cost); } -double ha_spider::read_time( - uint index, - uint ranges, - ha_rows rows -) { - DBUG_ENTER("ha_spider::read_time"); +IO_AND_CPU_COST ha_spider::rnd_pos_time(ha_rows rows) +{ + IO_AND_CPU_COST cost= { 0.0, 0.0}; // Row is in memory + return cost; +} + +IO_AND_CPU_COST ha_spider::keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks) +{ + IO_AND_CPU_COST cost; + DBUG_ENTER("ha_spider::keyread_time"); DBUG_PRINT("info",("spider this=%p", this)); + + /* + Here we only calculate transfer costs. The normal handler cost functions + will add costs for accessing a row/key. + */ if (wide_handler->keyread) { - DBUG_PRINT("info",("spider read_time(keyread) = %.6f", - share->read_rate * table->key_info[index].key_length * - rows / 2 + 2)); - DBUG_RETURN(share->read_rate * table->key_info[index].key_length * - rows / 2 + 2); + cost.io= 0; + cost.cpu= DISK_READ_COST * rows * table->key_info[index].key_length; } else { - DBUG_PRINT("info",("spider read_time = %.6f", - share->read_rate * share->stat.mean_rec_length * rows + 2)); - DBUG_RETURN(share->read_rate * share->stat.mean_rec_length * rows + 2); + cost.io= 0; + cost.cpu= DISK_READ_COST * rows * share->stat.mean_rec_length; } + DBUG_PRINT("info",("spider scan_time(keyread) = %.6f", cost.cpu)); + DBUG_RETURN(cost); } + const key_map *ha_spider::keys_to_use_for_scanning() { DBUG_ENTER("ha_spider::keys_to_use_for_scanning"); diff --git a/storage/spider/ha_spider.h b/storage/spider/ha_spider.h index 4dffdf78553..1c5c867b2f8 100644 --- a/storage/spider/ha_spider.h +++ b/storage/spider/ha_spider.h @@ -445,12 +445,10 @@ public: ); int delete_all_rows(); int truncate(); - double scan_time(); - double read_time( - uint index, - uint ranges, - ha_rows rows - ); + IO_AND_CPU_COST scan_time(); + IO_AND_CPU_COST rnd_pos_time(ha_rows rows); + IO_AND_CPU_COST keyread_time(uint index, ulong ranges, ha_rows rows, + ulonglong blocks); const key_map *keys_to_use_for_scanning(); ha_rows estimate_rows_upper_bound(); void print_error( diff --git a/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test b/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test index 60c0ad42921..02a4b803a89 100644 --- a/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test +++ b/storage/spider/mysql-test/spider/bugfix/t/mdev_27172.test @@ -2,6 +2,10 @@ --echo # MDEV-27172 Prefix indices on Spider tables may lead to wrong query results --echo # +# Disable test for ps-protocol as the general log has different number of +# commands for --ps +--source include/no_protocol.inc + --disable_query_log --disable_result_log --source ../../t/test_init.inc diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc index 03624d475dc..cd158c6102d 100644 --- a/storage/spider/spd_table.cc +++ b/storage/spider/spd_table.cc @@ -6520,6 +6520,25 @@ int spider_panic( DBUG_RETURN(0); } +static void spider_update_optimizer_costs(OPTIMIZER_COSTS *costs) +{ + /* Assume 1 Gigabyte network */ + costs->disk_read_cost= IO_SIZE/(1000000000/8)*1000.00000; + costs->index_block_copy_cost= 0; // Not used + + /* + The following costs are copied from ha_innodb.cc + The assumption is that the default storage engine used with Spider is + InnoDB. + */ + costs->row_next_find_cost= 0.00007013; + costs->row_lookup_cost= 0.00076597; + costs->key_next_find_cost= 0.00009900; + costs->key_lookup_cost= 0.00079112; + costs->row_copy_cost= 0.00006087; +} + + int spider_db_init( void *p ) { @@ -6563,6 +6582,7 @@ int spider_db_init( spider_hton->show_status = spider_show_status; spider_hton->create_group_by = spider_create_group_by_handler; spider_hton->table_options= spider_table_option_list; + spider_hton->update_optimizer_costs= spider_update_optimizer_costs; if (my_gethwaddr((uchar *) addr)) { diff --git a/tests/check_costs.pl b/tests/check_costs.pl new file mode 100755 index 00000000000..0e3b538b65b --- /dev/null +++ b/tests/check_costs.pl @@ -0,0 +1,1005 @@ +#!/usr/bin/env perl + +# Copyright (C) 2022 MariaDB Foundation +# Use is subject to license terms +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +# This is a test that runs queries to meassure if the MariaDB cost calculations +# are reasonable. +# +# The following test are run: +# - Full table scan of a table +# - Range scan of the table +# - Index scan of the table +# +# The output can be used to finetune the optimizer cost variables. +# +# The table in question is a similar to the 'lineitem' table used by DBT3 +# it has 16 field and could be regarded as a 'average kind of table'. +# Number of fields and record length places a small role when comparing +# index scan and table scan + +##################### Standard benchmark inits ############################## + +use DBI; +use Getopt::Long; +use Benchmark ':hireswallclock'; + +package main; + +$opt_rows=1000000; +$opt_test_runs= 2; # Run each test 2 times and take the average +$opt_verbose=""; +$opt_host=""; +$opt_db="test"; +$opt_user="test"; +$opt_password=""; +$opt_socket=undef; +$opt_skip_drop= undef; +$opt_skip_create= undef; +$opt_init_query= undef; +$opt_print_analyze= undef; +$opt_where_check= undef; +$opt_engine=undef; +$opt_comment=undef; +$opt_table_suffix=undef; +$opt_table_name= undef; +$opt_grof= undef; +$opt_all_tests=undef; +$opt_ratios= undef; +$opt_mysql= undef; +$has_force_index=1; + +@arguments= @ARGV; + +GetOptions("host=s","user=s","password=s", "rows=i","test-runs=i","socket=s", + "db=s", "table-name=s", "skip-drop","skip-create", + "init-query=s","engine=s","comment=s", + "gprof", "one-test=s", + "mysql", "all-tests", "ratios", "where-check", + "print-analyze", "verbose") || + die "Aborted"; + +$Mysql::db_errstr=undef; # Ignore warnings from these + +my ($base_table, $table, $dbh, $where_cost, $real_where_cost, $perf_ratio); + +if (!$opt_mysql) +{ + @engines= ("aria","innodb","myisam","heap"); +} +else +{ + @engines= ("innodb","myisam","heap"); +} + +# Special handling for some engines + +$no_force= 0; + +if (defined($opt_engine)) +{ + if (lc($engine) eq "archive") + { + $has_force_index= 0; # Skip tests with force index + } +} + + +if (defined($opt_gprof) || defined($opt_one_test)) +{ + die "one_test must be defined when --gprof is used" + if (!defined($opt_one_test)); + die "engine must be defined when --gprof or --one-test is used" + if (!defined($opt_engine)); + die "function '$opt_one_test' does not exist\n" + if (!defined(&{$opt_one_test})); +} + +# We add engine_name to the table name later + +$opt_table_name="check_costs" if (!defined($opt_table_name)); +$base_table="$opt_db.$opt_table_name"; + +#### +#### Start timeing and start test +#### + +$|= 1; # Autoflush +if ($opt_verbose) +{ + $opt_print_analyze= 1; +} + +#### +#### Create the table +#### + +my %attrib; + +$attrib{'PrintError'}=0; + +if (defined($opt_socket)) +{ + $attrib{'mariadb_socket'}=$opt_socket; +} + +$dbh = DBI->connect("DBI:MariaDB:$opt_db:$opt_host", + $opt_user, $opt_password,\%attrib) || die $DBI::errstr; + +print_mariadb_version(); +print "Server options: $opt_comment\n" if (defined($opt_comment)); +print "Running tests with $opt_rows rows\n"; + +print "Program arguments:\n"; +for ($i= 0 ; $i <= $#arguments; $i++) +{ + my $arg=$arguments[$i]; + if ($arg =~ / /) + { + if ($arg =~ /([^ =]*)=(.*)/) + { + print "$1=\"$2\" "; + } + else + { + print "\"$arg\"" . " "; + } + } + else + { + print $arguments[$i] . " "; + } +} +print "\n\n"; + +@test_names= + ("table scan no where", "table scan simple where", + "table scan where no match", "table scan complex where", "table scan", + "index scan", "index scan 4 parts", "range scan", "eq_ref_index_join", + "eq_ref_cluster_join", "eq_ref_join", "eq_ref_btree"); +$where_tests=3; # Number of where test to be compared with test[0] + +if ($opt_mysql) +{ + create_seq_table(); +} + + +if ($opt_engine || defined($opt_one_test)) +{ + test_engine(0, $opt_engine); +} +else +{ + my $i; + undef($opt_skip_create); + for ($i= 0 ; $i <= $#engines; $i++) + { + test_engine($i, $engines[$i]); + + if ($i > 0 && $opt_ratios) + { + print "\n"; + my $j; + + print "Ratios $engines[$i] / $engines[0]\n"; + for ($j= $where_tests+1 ; $j <= $#test_names ; $j++) + { + if ($res[$i][$j]) + { + my $cmp_cost= $res[0][$j]->{'cost'} - $res[0][$j]->{'where_cost'}; + my $cmp_time= $res[0][$j]->{'time'}; + my $cur_cost= $res[$i][$j]->{'cost'} - $res[$i][$j]->{'where_cost'}; + my $cur_time= $res[$i][$j]->{'time'}; + + printf "%14.14s cost: %6.4f time: %6.4f cost_multiplier: %6.4f\n", + $test_names[$j], + $cur_cost / $cmp_cost, + $cur_time / $cmp_time, + ($cmp_cost * ($cur_time / $cmp_time))/$cur_cost; + } +000000 } + } +# if ($i + 1 <= $#engines) + { + print "-------------------------\n\n"; + } + } + print_totals(); +} + +$dbh->do("drop table if exists $table") if (!defined($opt_skip_drop)); +$dbh->disconnect; $dbh=0; # Close handler +exit(0); + + +sub test_engine() +{ + my ($i, $engine)= @_; + my ($cur_rows); + + setup($opt_init_query); + setup_engine($engine); + $table= $base_table . "_$engine"; + if (!defined($opt_skip_create)) + { + my $index_type=""; + + # We should use btree index with heap to ge range scans + $index_type= "using btree" if (lc($engine) eq "heap"); + + print "Creating table $table of type $engine\n"; + $dbh->do("drop table if exists $table"); + $dbh->do("create table $table ( + `l_orderkey` int(11) NOT NULL, + `l_partkey` int(11) DEFAULT NULL, + `l_suppkey` int(11) DEFAULT NULL, + `l_linenumber` int(11) NOT NULL, + `l_extra` int(11) NOT NULL, + `l_quantity` double DEFAULT NULL, + `l_extendedprice` double DEFAULT NULL, + `l_discount` double DEFAULT NULL, + `l_tax` double DEFAULT NULL, + `l_returnflag` char(1) DEFAULT NULL, + `l_linestatus` char(1) DEFAULT NULL, + `l_shipDATE` date DEFAULT NULL, + `l_commitDATE` date DEFAULT NULL, + `l_receiptDATE` date DEFAULT NULL, + `l_shipinstruct` char(25) DEFAULT NULL, + `l_shipmode` char(10) DEFAULT NULL, + `l_comment` varchar(44) DEFAULT NULL, + PRIMARY KEY (`l_orderkey`), + UNIQUE (`l_linenumber`), + UNIQUE (`l_extra`) $index_type, + KEY `l_suppkey` $index_type (l_suppkey, l_partkey), + KEY `long_suppkey` $index_type + (l_partkey, l_suppkey, l_linenumber, l_extra) ) + ENGINE= $engine") + or die "Got error on CREATE TABLE: $DBI::errstr"; + } + $cur_rows= get_row_count(); + if ($cur_rows == 0 || !defined($opt_skip_create)) + { + $dbh->do("insert into $table select + seq, seq/10, seq, seq, seq, seq, seq, mod(seq,10)*10, + 0, 'a','b', + date_add('2000-01-01', interval seq/500 day), + date_add('2000-01-10', interval seq/500 day), + date_add('2000-01-20', interval seq/500 day), + left(md5(seq),25), + if(seq & 1,'mail','ship'), + repeat('a',mod(seq,40)) + from seq_1_to_$opt_rows") + or die "Got error on INSERT: $DBI::errstr"; + + $sth= $dbh->do("analyze table $table") + or die "Got error on 'analyze table: " . $dbh->errstr . "\n"; + } + else + { + $opt_rows= $cur_rows; + die "Table $table is empty. Please run without --skip-create" + if ($opt_rows == 0); + print "Reusing old table $table, which has $opt_rows rows\n"; + } + + if (!$opt_mysql) + { + $where_cost=get_variable("optimizer_where_cost"); + if (defined($where_cost)) + { + # Calculate cost of where once. Must be done after table is created + $real_where_cost= get_where_cost(); + $perf_ratio= $real_where_cost/$where_cost; + printf "Performance ratio compared to base computer: %6.4f\n", + $perf_ratio; + } + print "\n"; + } + else + { + $where_cost=0.1; # mysql 'm_row_evaluate_cost' + } + + + if (defined($opt_one_test)) + { + if (defined($opt_gprof)) + { + # Argument is the name of the test function + test_with_gprof($opt_one_test, 10); + return; + } + $opt_one_test->(); + return; + } + + if ($opt_where_check) + { + $res[$i][0]= table_scan_without_where(0); + $res[$i][1]= table_scan_with_where(1); + $res[$i][2]= table_scan_with_where_no_match(2); + $res[$i][3]= table_scan_with_complex_where(3); + } + $res[$i][4]= table_scan_without_where_analyze(4); + $res[$i][5]= index_scan(5); + $res[$i][6]= index_scan_4_parts(6) if ($opt_all_tests); + $res[$i][7]= range_scan(7); + $res[$i][8]= eq_ref_index_join(8); + $res[$i][9]= eq_ref_clustered_join(9); + $res[$i][10]= eq_ref_join(10); + $res[$i][11]= eq_ref_join_btree(11); + + if ($opt_where_check) + { + printf "Variable optimizer_where_cost: cur: %6.4f real: %6.4f prop: %6.4f\n", + $where_cost, $real_where_cost, $perf_ratio; + print "Ratio of WHERE costs compared to scan without a WHERE\n"; + for ($j= 1 ; $j <= $where_tests ; $j++) + { + print_where_costs($i,$j,0); + } + print "\n"; + } + + print "Cost/time ratio for different scans types\n"; + for ($j= $where_tests+1 ; $j <= $#test_names ; $j++) + { + if ($res[$i][$j]) + { + print_costs($test_names[$j], $res[$i][$j]); + } + } +} + + +sub print_costs($;$) +{ + my ($name, $cur_res)= @_; + + # Cost without where clause + my $cur_cost= $cur_res->{'cost'} - $cur_res->{'where_cost'}; + my $cur_time= $cur_res->{'time'}; + + printf "%-20.20s cost: %9.4f time: %9.4f cost/time: %8.4f\n", + $name, + $cur_cost, $cur_time, $cur_cost/$cur_time; +} + +sub print_where_costs() +{ + my ($index, $cmp, $base)= @_; + + my $cmp_time= $res[$index][$cmp]->{'time'}; + my $base_time= $res[$index][$base]->{'time'}; + + printf "%-30.30s time: %6.4f\n", $test_names[$cmp], $cmp_time / $base_time; +} + + +# Used to setup things like optimizer_switch or optimizer_cache_hit_ratio + +sub setup() +{ + my ($query)= @_; + my ($sth,$query); + + $sth= $dbh->do("flush tables") || + die "Got error on 'flush tables': " . $dbh->errstr . "\n"; + if (defined($query)) + { + $sth= $dbh->do("$query") || + die "Got error on '$query': " . $dbh->errstr . "\n"; + } + + # Set variables that may interfer with timings + $query= "set \@\@optimizer_switch='index_condition_pushdown=off'"; + $sth= $dbh->do($query) || + die "Got error on '$query': " . $dbh->errstr . "\n"; +} + + +sub setup_engine() +{ + my ($engine)= @_; + my ($sth,$query); + + if (!$opt_mysql) + { + # Set variables that may interfere with timings + $query= "set global $engine.optimizer_disk_read_ratio=0"; + $sth= $dbh->do($query) || + die "Got error on '$query': " . $dbh->errstr . "\n"; + } +} + +sub create_seq_table +{ + my $name= "seq_1_to_$opt_rows"; + my $i; + print "Creating $name\n"; + $dbh->do("drop table if exists $name") || + die "Error on drop: " . $dbh->errstr ."\n"; + $dbh->do("create table $name (seq int(11) not null) engine=heap") + || die "Error on create: " . $dbh->errstr ."\n"; + for ($i= 1 ; $i < $opt_rows ; $i+=10) + { + $dbh->do("insert into $name values + ($i),($i+1),($i+2),($i+3),($i+4),($i+5),($i+6),($i+7),($i+8),($i+9)") || die "Error on insert"; + } +} + + + +############################################################################## +# Query functions +############################################################################## + +# Calculate the cost of the WHERE clause + +sub table_scan_without_where() +{ + my ($query_id)= @_; + return run_query($test_names[$query_id], + "table_scan", "ALL", $opt_rows, +"select sum(l_quantity) from $table"); +} + +sub table_scan_with_where() +{ + my ($query_id)= @_; + return run_query($test_names[$query_id], + "table_scan", "ALL", $opt_rows, +"select sum(l_quantity) from $table where l_commitDate >= '2000-01-01' and l_tax >= 0.0"); +} + +sub table_scan_with_where_no_match() +{ + my ($query_id)= @_; + return run_query($test_names[$query_id], + "table_scan", "ALL", $opt_rows, +"select sum(l_quantity) from $table where l_commitDate >= '2000-01-01' and l_tax > 0.0 /* NO MATCH */"); +} + + +sub table_scan_with_complex_where() +{ + my ($query_id)= @_; + return run_query($test_names[$query_id], + "table_scan", "ALL", $opt_rows, +"select sum(l_quantity) from $table where l_commitDate >= '2000-01-01' and l_quantity*l_extendedprice-l_discount+l_tax > 0.0"); +} + +# Calculate the time spent for table accesses (done with analyze statment) + +# Table scan + +sub table_scan_without_where_analyze() +{ + my ($query_id)= @_; + return run_query_with_analyze($test_names[$query_id], + "table_scan", "ALL", $opt_rows, +"select sum(l_quantity) from $table"); +} + +# Index scan with 2 key parts + +sub index_scan() +{ + my ($query_id)= @_; + return 0 if (!$has_force_index); + my ($query_id)= @_; + return run_query_with_analyze($test_names[$query_id], + "index_scan", "index", $opt_rows, +"select count(*) from $table force index (l_suppkey) where l_suppkey >= 0 and l_partkey >=0"); +} + +# Index scan with 2 key parts +# This is to check how the number of key parts affects the timeings + +sub index_scan_4_parts() +{ + my ($query_id)= @_; + return 0 if (!$has_force_index); + return run_query_with_analyze($test_names[$query_id], + "index_scan_4_parts", "index", $opt_rows, +"select count(*) from $table force index (long_suppkey) where l_linenumber >= 0 and l_extra >0"); +} + +sub range_scan() +{ + my ($query_id)= @_; + return 0 if (!$has_force_index); + return run_query_with_analyze($test_names[$query_id], + "range_scan", "range", $opt_rows, +"select sum(l_orderkey) from $table force index(l_suppkey) where l_suppkey >= 0 and l_partkey >=0 and l_discount>=0.0"); +} + +sub eq_ref_index_join() +{ + my ($query_id)= @_; + return run_query_with_analyze($test_names[$query_id], + "eq_ref_index_join", "eq_ref", 1, +"select straight_join count(*) from seq_1_to_$opt_rows,$table where seq=l_linenumber"); +} + +sub eq_ref_clustered_join() +{ + my ($query_id)= @_; + return run_query_with_analyze($test_names[$query_id], + "eq_ref_cluster_join", "eq_ref", 1, +"select straight_join count(*) from seq_1_to_$opt_rows,$table where seq=l_orderkey"); +} + +sub eq_ref_join() +{ + my ($query_id)= @_; + return run_query_with_analyze($test_names[$query_id], + "eq_ref_join", "eq_ref", 1, +"select straight_join count(*) from seq_1_to_$opt_rows,$table where seq=l_linenumber and l_partkey >= 0"); +} + +sub eq_ref_join_btree() +{ + my ($query_id)= @_; + return run_query_with_analyze($test_names[$query_id], + "eq_ref_btree", "eq_ref", 1, +"select straight_join count(*) from seq_1_to_$opt_rows,$table where seq=l_extra and l_partkey >= 0"); +} + + +# Calculate the cost of a basic where clause +# This can be used to find out the speed of the current computer compared +# to the reference computer on which the costs where calibrated. + +sub get_where_cost() +{ + my ($loop); + $loop=10000000; + # Return time in microseconds for one where (= optimizer_where_cost) + return query_time("select benchmark($loop, l_commitDate >= '2000-01-01' and l_tax >= 0.0) from $table limit 1")/$loop; +} + + +# Run a query to be able to calculate the costs of filter + +sub cost_of_filtering() +{ + my ($query, $cost1, $cost2); + do_query("set \@\@max_rowid_filter_size=10000000," . + "optimizer_switch='rowid_filter=on',". + "\@\@optimizer_scan_setup_cost=1000000"); + do_query("set \@old_cost=\@\@aria.OPTIMIZER_ROW_LOOKUP_COST"); + do_query("set global aria.OPTIMIZER_ROW_LOOKUP_COST=1"); + do_query("flush tables"); + $cost1= run_query_with_analyze("range", "range", "range", 500000, + "select count(l_discount) from check_costs_aria as t1 where t1.l_orderkey between 1 and 500000"); + $cost2= run_query_with_analyze("range-all", "range-all", "range|filter", 500000, + "select count(l_discount) from check_costs_aria as t1 where t1.l_orderkey between 1 and 500000 and l_linenumber between 1 and 500000"); + $cost3= run_query_with_analyze("range-none","range-none", "range|filter", 500000, + "select count(l_discount) from check_costs_aria as t1 where t1.l_orderkey between 1 and 500000 and l_linenumber between 500000 and 1000000"); + do_query("set global aria.OPTIMIZER_ROW_LOOKUP_COST=\@old_cost"); + do_query("flush tables"); + print_costs("range", $cost1); + print_costs("filter-all", $cost2); + print_costs("filter-none", $cost3); +} + +sub gprof_cost_of_filtering() +{ + $cost2= run_query_with_analyze("gprof","range-all", "range|filter", 500000, + "select count(l_discount) from check_costs_aria as t1 where t1.l_orderkey between 1 and 500000 and l_linenumber between 1 and 500000"); +} + + +############################################################################### +# Help functions for running the queries +############################################################################### + + +# Run query and return time for query in microseconds + +sub query_time() +{ + my ($query)= @_; + my ($start_time,$end_time,$time,$ms,$sth,$row); + + $start_time= new Benchmark; + $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n"; + $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n"; + $end_time=new Benchmark; + $row= $sth->fetchrow_arrayref(); + $sth=0; + + $time= timestr(timediff($end_time, $start_time),"nop"); + $time =~ /([\d.]*)/; + return $1*1000000.0; +} + +# +# Run a query and compare the clock time +# + +sub run_query() +{ + my ($full_name, $name, $type, $expected_rows, $query)= @_; + my ($start_time,$end_time,$sth,@row,%res,$i,$optimizer_rows); + my ($extra, $last_type, $adjust_cost, $ms); + $adjust_cost=1.0; + + print "Timing full query: $full_name\n$query\n"; + + $sth= $dbh->prepare("explain $query") || die "Got error on 'explain $query': " . $dbh->errstr . "\n"; + $sth->execute || die "Got error on 'explain $query': " . $dbh->errstr . "\n"; + + print "explain:\n"; + while ($row= $sth->fetchrow_arrayref()) + { + print $row->[0]; + for ($i= 1 ; $i < @$row; $i++) + { + print " " . $row->[$i] if (defined($row->[$i])); + } + print "\n"; + + $extra= $row->[@$row-1]; + $last_type= $row->[3]; + $optimizer_rows= $row->[8]; + } + if ($last_type ne $type && + ($type ne "index" || !($extra =~ /Using index/))) + { + print "Warning: Wrong scan type: '$last_type', expected '$type'\n"; + } + + if ($expected_rows >= 0 && + (abs($optimizer_rows - $expected_rows)/$expected_rows) > 0.1) + { + printf "Warning: Expected $expected_rows instead of $optimizer_rows from EXPLAIN. Adjusting costs\n"; + $adjust_cost= $expected_rows / $optimizer_rows; + } + + # Do one query to fill the cache + $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n"; + $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n"; + $end_time=new Benchmark; + $row= $sth->fetchrow_arrayref(); + $sth=0; + + # Run query for real + $start_time= new Benchmark; + $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n"; + $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n"; + $end_time=new Benchmark; + $row= $sth->fetchrow_arrayref(); + $sth=0; + + $time= timestr(timediff($end_time, $start_time),"nop"); + $time =~ /([\d.]*)/; + $ms= $1*1000.0; + + $query= "show status like 'last_query_cost'"; + $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n"; + $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";; + $row= $sth->fetchrow_arrayref(); + $sth=0; + $cost= $row->[1] * $adjust_cost; + printf "%10s time: %10.10s ms cost: %6.4f", $name, $ms, $cost; + if ($adjust_cost != 1.0) + { + printf " (was %6.4f)", $row->[1]; + } + print "\n\n"; + + $res{'cost'}= $cost; + $res{'time'}= $ms; + return \%res; +} + +# +# Run a query and compare the table access time from analyze statement +# The cost works for queries with one or two tables! +# + +sub run_query_with_analyze() +{ + my ($full_name,$name, $type, $expected_rows, $query)= @_; + my ($start_time,$end_time,$sth,@row,%res,$i,$j); + my ($optimizer_rows, $optimizer_rows_first); + my ($adjust_cost, $ms, $second_ms, $analyze, $local_where_cost); + my ($extra, $last_type, $tot_ms, $found_two_tables); + + $found_two_tables= 0; + $adjust_cost=1.0; + if (!$opt_mysql) + { + $local_where_cost= $where_cost/1000 * $opt_rows; + } + else + { + $local_where_cost= $where_cost * $opt_rows; + } + $optimizer_rows_first= undef; + + print "Timing table access for query: $full_name\n$query\n"; + + $sth= $dbh->prepare("explain $query") || die "Got error on 'explain $query': " . $dbh->errstr . "\n"; + $sth->execute || die "Got error on 'explain $query': " . $dbh->errstr . "\n"; + + print "explain:\n"; + if (!$opt_mysql) + { + $type_pos= 3; + $row_pos= 8; + } + else + { + $type_pos= 4; + $row_pos= 9; + } + + $j= 0; + while ($row= $sth->fetchrow_arrayref()) + { + $j++; + print $row->[0]; + for ($i= 1 ; $i < @$row; $i++) + { + print " " . $row->[$i] if (defined($row->[$i])); + # print " X" if (!defined($row->[$i])); + } + print "\n"; + + $extra= $row->[@$row-1]; + $last_type= $row->[$type_pos]; + if (!defined($optimizer_rows_first)) + { + $optimizer_rows_first= $row->[$row_pos]; + } + $optimizer_rows= $row->[$row_pos]; + } + $found_two_tables= 1 if ($j > 1); + + if ($last_type ne $type && + ($type ne "index" || !($extra =~ /Using index/))) + { + print "Warning: Wrong scan type: '$last_type', expected '$type'\n"; + } + if ($expected_rows >= 0 && + (abs($optimizer_rows - $expected_rows)/$expected_rows) > 0.1) + { + printf "Warning: Expected $expected_rows instead of $optimizer_rows from EXPLAIN. Adjusting costs\n"; + $adjust_cost= $expected_rows / $optimizer_rows; + } + + # Do one query to fill the cache + if (!defined($opt_grof)) + { + $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n"; + $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n"; + $row= $sth->fetchrow_arrayref(); + $sth=0; + } + + # Run the query through analyze statement + $tot_ms=0; + if (!$opt_mysql) + { + for ($i=0 ; $i < $opt_test_runs ; $i++) + { + my ($j); + $sth= $dbh->prepare("analyze format=json $query" ) || die "Got error on 'analzye $query': " . $dbh->errstr . "\n"; + $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n"; + $row= $sth->fetchrow_arrayref(); + $analyze= $row->[0]; + $sth=0; + + # Fetch the timings + $j=0; + while ($analyze =~ /r_table_time_ms": ([0-9.]*)/g) + { + $tot_ms= $tot_ms+ $1; + $j++; + } + if ($j > 2) + { + die "Found too many tables, program needs to be extended!" + } + # Add cost of filtering + while ($analyze =~ /r_filling_time_ms": ([0-9.]*)/g) + { + $tot_ms= $tot_ms+ $1; + } + } + } + else + { + my $local_table= substr($table,index($table,".")+1); + for ($i=0 ; $i < $opt_test_runs ; $i++) + { + my ($j); + $sth= $dbh->prepare("explain analyze $query" ) || die "Got error on 'analzye $query': " . $dbh->errstr . "\n"; + $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n"; + $row= $sth->fetchrow_arrayref(); + $analyze= $row->[0]; + $sth=0; + } + # Fetch the timings + $j=0; + + if ($analyze =~ / $local_table .*actual time=([0-9.]*) .*loops=([0-9]*)/g) + { + my $times= $1; + my $loops= $2; + $times =~ /\.\.([0-9.]*)/; + $times= $1; + $times="0.005" if ($times == 0); + #print "time: $times \$1: $1 loops: $loops\n"; + $tot_ms= $tot_ms+ $times*$loops; + $j++; + } + if ($j > 1) + { + die "Found too many tables, program needs to be extended!" + } + } + + + if ($found_two_tables) + { + # Add the cost of the where for the two tables. The last table + # is assumed to have $expected_rows while the first (driving table) + # may have less rows. Take that into account when calculalting the + # total where cost. + $local_where_cost= ($local_where_cost + + $local_where_cost * + ($optimizer_rows_first/$opt_rows)); + } + $ms= $tot_ms/$opt_test_runs; + + if ($opt_print_analyze) + { + print "\nanalyze:\n" . $analyze . "\n\n"; + } + + if (!defined($opt_grof)) + { + # Get last query cost + $query= "show status like 'last_query_cost'"; + $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n"; + $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";; + $row= $sth->fetchrow_arrayref(); + $sth=0; + $cost= $row->[1] * $adjust_cost; + + printf "%10s time: %10.10s ms cost-where: %6.4f cost: %6.4f", + $name, $ms, $cost - $local_where_cost, $cost; + if ($adjust_cost != 1.0) + { + printf " (cost was %6.4f)", $row->[1]; + } + } + else + { + printf "%10s time: %10.10s ms", $name, $ms; + $cost= 0; $local_where_cost= 0; + } + print "\n\n"; + + $res{'cost'}= $cost; + $res{'where_cost'}= $local_where_cost; + $res{'time'}= $ms; + return \%res; +} + + +sub do_query() +{ + my ($query)= @_; + $dbh->do($query) || die "Got error on '$query': " . $dbh->errstr . "\n"; +} + + +sub print_totals() +{ + my ($i, $j); + print "Totals per test\n"; + for ($j= $where_tests+1 ; $j <= $#test_names; $j++) + { + print "$test_names[$j]:\n"; + for ($i= $0 ; $i <= $#engines ; $i++) + { + if ($res[$i][$j]) + { + my $cost= $res[$i][$j]->{'cost'} - $res[$i][$j]->{'where_cost'}; + my $ms= $res[$i][$j]->{'time'}; + printf "%-8s %10.4f ms cost: %10.4f cost/time: %8.4f\n", + $engines[$i], $ms, $cost, $cost/$ms; + } + } + } +} + + +# This function can be used to test things with gprof + +sub test_with_gprof() +{ + my ($function_ref, $loops)= @_; + my ($sum, $i, $cost); + + printf "Running test $function_ref $loops time\n"; + $sum= 0; $loops=10; + for ($i=0 ; $i < $loops ; $i++) + { + $cost= $function_ref->(); + $sum+= $cost->{'time'}; + } + print "Average: " . ($sum/$loops) . "\n"; + print "Shuting down server\n"; + $dbh->do("shutdown") || die "Got error .."; +} + +############################################################################## +# Get various simple data from MariaDB +############################################################################## + +sub print_mariadb_version() +{ + my ($query, $sth, $row); + $query= "select VERSION()"; + $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n"; +$sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";; + $row= $sth->fetchrow_arrayref(); + print "Server: $row->[0]"; + + $query= "show variables like 'VERSION_SOURCE_REVISION'"; + $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n"; +$sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";; + $row= $sth->fetchrow_arrayref(); + print " Commit: $row->[1]\n"; +} + + +sub get_row_count() +{ + $query= "select count(*) from $table"; + $sth= $dbh->prepare($query) || die "Got error on '$query': " . $dbh->errstr . "\n"; + if (!$sth->execute) + { + if (!($dbh->errstr =~ /doesn.*exist/)) + { + die "Got error on '$query': " . $dbh->errstr . "\n"; + } + return 0; + } + $row= $sth->fetchrow_arrayref(); + return $row->[0]; +} + + +sub get_variable() +{ + my ($name)= @_; + $query= "select @@" . $name; + if (!($sth= $dbh->prepare($query))) + { + die "Got error on '$query': " . $dbh->errstr . "\n"; + } + $sth->execute || die "Got error on '$query': " . $dbh->errstr . "\n";; + $row= $sth->fetchrow_arrayref(); + return $row->[0]; +} |