From 099b9202a5c0496cc4f4024a6e667fd526664f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 3 Jun 2022 10:47:34 +0300 Subject: MDEV-27697 fixup: Exclude debug code from non-debug builds --- sql/slave.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/slave.cc b/sql/slave.cc index 749aad5d683..60a62d71cf6 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2017, Oracle and/or its affiliates. - Copyright (c) 2009, 2020, MariaDB Corporation + Copyright (c) 2009, 2022, MariaDB Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -6586,8 +6586,8 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) can be satisfied only with the strict mode that ensures against "genuine" gtid duplicates. */ - rpl_gtid *gtid_in_slave_state= - mi->gtid_current_pos.find(mi->last_queued_gtid.domain_id); + IF_DBUG(rpl_gtid *gtid_in_slave_state= + mi->gtid_current_pos.find(mi->last_queued_gtid.domain_id),); // Slave gtid state must not have updated yet to the last received gtid. DBUG_ASSERT((mi->using_gtid == Master_info::USE_GTID_NO || -- cgit v1.2.1 From 392e744aec9e52802e7317f8c7f9b52019026ab9 Mon Sep 17 00:00:00 2001 From: Monty Date: Wed, 4 May 2022 17:30:21 +0300 Subject: Fixed crashing when using DBUG_PUSH_EMPTY DBUG_PUSH_EMPTY is used by thr_mutex.cc. If there are 4G of DBUG_PUSH_EMPTY calls, then DBUG_POP_EMPTY will cause a crash when DBUGCloseFile() will try to free an object that was never allocated. --- dbug/dbug.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbug/dbug.c b/dbug/dbug.c index 011b932a721..169dd226419 100644 --- a/dbug/dbug.c +++ b/dbug/dbug.c @@ -1994,11 +1994,10 @@ static void DBUGOpenFile(CODE_STATE *cs, static void DBUGCloseFile(CODE_STATE *cs, sFILE *new_value) { sFILE *fp; - if (!cs || !cs->stack || !cs->stack->out_file) + if (!cs || !cs->stack || !(fp= cs->stack->out_file)) return; - fp= cs->stack->out_file; - if (--fp->used == 0) + if (fp != sstdout && fp != sstderr && --fp->used == 0) { if (fclose(fp->file) == EOF) { -- cgit v1.2.1 From 9e6fd2995b2276aa50b2433e475cfe46dfd2bc2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 7 Jun 2022 10:53:33 +0300 Subject: MDEV-25506 fixup: Wait for TRUNCATE recovery --- mysql-test/suite/innodb/r/truncate_crash.result | 4 ++++ mysql-test/suite/innodb/t/truncate_crash.test | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/mysql-test/suite/innodb/r/truncate_crash.result b/mysql-test/suite/innodb/r/truncate_crash.result index 0c20e76d331..ae3e6f6f4bf 100644 --- a/mysql-test/suite/innodb/r/truncate_crash.result +++ b/mysql-test/suite/innodb/r/truncate_crash.result @@ -8,6 +8,10 @@ connection default; SET DEBUG_SYNC='now WAIT_FOR c'; # restart disconnect wait; +SET @save_frequency=@@GLOBAL.innodb_purge_rseg_truncate_frequency; +SET GLOBAL innodb_purge_rseg_truncate_frequency=1; +InnoDB 0 transactions not purged +SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency; SELECT COUNT(*) FROM t1; COUNT(*) 0 diff --git a/mysql-test/suite/innodb/t/truncate_crash.test b/mysql-test/suite/innodb/t/truncate_crash.test index 5cb39c745dc..27b8feea8a4 100644 --- a/mysql-test/suite/innodb/t/truncate_crash.test +++ b/mysql-test/suite/innodb/t/truncate_crash.test @@ -17,7 +17,13 @@ SET DEBUG_SYNC='now WAIT_FOR c'; --source include/restart_mysqld.inc disconnect wait; +SET @save_frequency=@@GLOBAL.innodb_purge_rseg_truncate_frequency; +SET GLOBAL innodb_purge_rseg_truncate_frequency=1; +--source include/wait_all_purged.inc +SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency; + --replace_result 2 0 SELECT COUNT(*) FROM t1; TRUNCATE TABLE t1; DROP TABLE t1; + -- cgit v1.2.1 From 09177eadc39ae1e777ad473970456cb9dd9c3993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 7 Jun 2022 16:00:49 +0300 Subject: MDEV-25257 follow-up: full_crc32 format is garbage-free In any files that were created in the innodb_checksum_algorithm=full_crc32 format (commit c0f47a4a58424c621204dacb8016a94b66cb2bce) any unused data fields will have been zero-initialized (commit 3926673ce7149aa223103126b6aeac819b10fab5). --- storage/innobase/dict/dict0load.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index bfef508c613..a8972bab234 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2021, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1304,7 +1304,8 @@ dict_sys_tables_rec_read( high bit set in n_cols, and flags would be zero. MySQL 4.1 was the first version to support innodb_file_per_table, that is, *space_id != 0. */ - if (not_redundant || *space_id != 0 || *n_cols & DICT_N_COLS_COMPACT) { + if (not_redundant || *space_id != 0 || *n_cols & DICT_N_COLS_COMPACT + || fil_system.sys_space->full_crc32()) { /* Get flags2 from SYS_TABLES.MIX_LEN */ field = rec_get_nth_field_old( -- cgit v1.2.1 From 19c721631ef21bcf3ce3ea3a036da5e234b0f49c Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 6 Jun 2022 22:21:22 +0300 Subject: MDEV-28749: restore_prev_nj_state() doesn't update cur_sj_inner_tables correctly (Try 2) (Cherry-pick back into 10.3) The code that updates semi-join optimization state for a join order prefix had several bugs. The visible effect was bad optimization for FirstMatch or LooseScan strategies: they either weren't considered when they should have been, or considered when they shouldn't have been. In order to hit the bug, the optimizer needs to consider several different join prefixes in a certain order. Queries with "obvious" query plans which prune all join orders except one are not affected. Internally, the bugs in updates of semi-join state were: 1. restore_prev_sj_state() assumed that "we assume remaining_tables doesnt contain @tab" which wasn't true. 2. Another bug in this function: it did remove bits from join->cur_sj_inner_tables but never added them. 3. greedy_search() adds tables into the join prefix but neglects to update the semi-join optimization state. (It does update nested outer join state, see this call: check_interleaving_with_nj(best_table) but there's no matching call to update the semi-join state. (This wasn't visible because most of the state is in the POSITION structure which is updated. But there is also state in JOIN, too) The patch: - Fixes all of the above - Adds JOIN::dbug_verify_sj_inner_tables() which is used to verify the state is correct at every step. - Renames advance_sj_state() to optimize_semi_joins(). = Introduces update_sj_state() which ideally should have been called "advance_sj_state" but I didn't reuse the name to not create confusion. --- mysql-test/main/subselect_sj.result | 6 +- mysql-test/main/subselect_sj_jcl6.result | 8 +-- sql/opt_subselect.cc | 103 +++++++++++++++++++++++++------ sql/opt_subselect.h | 8 ++- sql/sql_select.cc | 18 ++++-- sql/sql_select.h | 8 ++- 6 files changed, 118 insertions(+), 33 deletions(-) diff --git a/mysql-test/main/subselect_sj.result b/mysql-test/main/subselect_sj.result index 13b8a16080f..787ec226bab 100644 --- a/mysql-test/main/subselect_sj.result +++ b/mysql-test/main/subselect_sj.result @@ -2178,10 +2178,10 @@ INSERT INTO t5 VALUES (7,0),(9,0); explain SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY t5 index a a 10 NULL 2 Using index; Start temporary +1 PRIMARY t5 index a a 10 NULL 2 Using where; Using index; LooseScan 1 PRIMARY t4 ALL NULL NULL NULL NULL 3 -1 PRIMARY t2 ALL b NULL NULL NULL 10 Using where -1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; End temporary; Using join buffer (flat, BNL join) +1 PRIMARY t2 ref b b 5 test.t5.b 2 Using where; FirstMatch(t5) +1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; Using join buffer (flat, BNL join) SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); a 0 diff --git a/mysql-test/main/subselect_sj_jcl6.result b/mysql-test/main/subselect_sj_jcl6.result index 54196f62211..be2a55d65d6 100644 --- a/mysql-test/main/subselect_sj_jcl6.result +++ b/mysql-test/main/subselect_sj_jcl6.result @@ -2189,10 +2189,10 @@ INSERT INTO t5 VALUES (7,0),(9,0); explain SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY t5 index a a 10 NULL 2 Using index; Start temporary -1 PRIMARY t4 ALL NULL NULL NULL NULL 3 Using join buffer (flat, BNL join) -1 PRIMARY t2 ALL b NULL NULL NULL 10 Using where; Using join buffer (incremental, BNL join) -1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; End temporary; Using join buffer (incremental, BNL join) +1 PRIMARY t5 index a a 10 NULL 2 Using where; Using index; LooseScan +1 PRIMARY t4 ALL NULL NULL NULL NULL 3 +1 PRIMARY t2 ref b b 5 test.t5.b 2 Using where; FirstMatch(t5) +1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; Using join buffer (flat, BNL join) SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); a 0 diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 6080717e5f5..6e89fe400d3 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -179,7 +179,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - optimize_semijoin_nests() does pre-optimization - during join optimization, the join has one JOIN_TAB (or is it POSITION?) - array, and suffix-based detection is used, see advance_sj_state() + array, and suffix-based detection is used, see optimize_semi_joins() - after join optimization is done, get_best_combination() switches the data-structure to prefix-based, multiple JOIN_TAB ranges format. @@ -2655,7 +2655,7 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) Do semi-join optimization step after we've added a new tab to join prefix SYNOPSIS - advance_sj_state() + optimize_semi_joins() join The join we're optimizing remaining_tables Tables not in the join prefix new_join_tab Join tab we've just added to the join prefix @@ -2715,9 +2715,9 @@ bool is_multiple_semi_joins(JOIN *join, POSITION *prefix, uint idx, table_map in } -void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, - double *current_record_count, double *current_read_time, - POSITION *loose_scan_pos) +void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx, + double *current_record_count, + double *current_read_time, POSITION *loose_scan_pos) { POSITION *pos= join->positions + idx; const JOIN_TAB *new_join_tab= pos->table; @@ -2876,19 +2876,36 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, } } - if ((emb_sj_nest= new_join_tab->emb_sj_nest)) + update_sj_state(join, new_join_tab, idx, remaining_tables); + + pos->prefix_cost.convert_from_cost(*current_read_time); + pos->prefix_record_count= *current_record_count; + pos->dups_producing_tables= dups_producing_tables; +} + + +/* + Update JOIN's semi-join optimization state after the join tab new_tab + has been added into the join prefix. + + @seealso restore_prev_sj_state() does the reverse actoion +*/ + +void update_sj_state(JOIN *join, const JOIN_TAB *new_tab, + uint idx, table_map remaining_tables) +{ + if (TABLE_LIST *emb_sj_nest= new_tab->emb_sj_nest) { join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ if (!(remaining_tables & - emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map)) + emb_sj_nest->sj_inner_tables & ~new_tab->table->map)) join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; } - - pos->prefix_cost.convert_from_cost(*current_read_time); - pos->prefix_record_count= *current_record_count; - pos->dups_producing_tables= dups_producing_tables; +#ifndef DBUG_OFF + join->dbug_verify_sj_inner_tables(idx + 1); +#endif } @@ -3402,10 +3419,45 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, return FALSE; } +#ifndef DBUG_OFF +/* + Verify the value of JOIN::cur_sj_inner_tables by recomputing it +*/ +void JOIN::dbug_verify_sj_inner_tables(uint prefix_size) const +{ + table_map cur_map= const_table_map; + table_map nests_entered= 0; + if (emb_sjm_nest) + { + DBUG_ASSERT(cur_sj_inner_tables == 0); + return; + } + + for (uint i= const_tables; i < prefix_size; i++) + { + JOIN_TAB *tab= positions[i].table; + cur_map |= tab->table->map; + if (TABLE_LIST *sj_nest= tab->emb_sj_nest) + { + nests_entered |= sj_nest->sj_inner_tables; + if (!(sj_nest->sj_inner_tables & ~cur_map)) + { + // all nest tables are in the prefix already + nests_entered &= ~sj_nest->sj_inner_tables; + } + } + } + DBUG_ASSERT(nests_entered == cur_sj_inner_tables); +} +#endif /* Remove the last join tab from from join->cur_sj_inner_tables bitmap - we assume remaining_tables doesnt contain @tab. + + @note + remaining_tables contains @tab. + + @seealso update_sj_state() does the reverse */ void restore_prev_sj_state(const table_map remaining_tables, @@ -3419,15 +3471,30 @@ void restore_prev_sj_state(const table_map remaining_tables, tab->join->sjm_lookup_tables &= ~subq_tables; } - if ((emb_sj_nest= tab->emb_sj_nest)) + if (!tab->join->emb_sjm_nest && (emb_sj_nest= tab->emb_sj_nest)) { + table_map subq_tables= emb_sj_nest->sj_inner_tables & + ~tab->join->const_table_map; /* If we're removing the last SJ-inner table, remove the sj-nest */ - if ((remaining_tables & emb_sj_nest->sj_inner_tables) == - (emb_sj_nest->sj_inner_tables & ~tab->table->map)) + if ((remaining_tables & subq_tables) == subq_tables) { + // All non-const tables of the SJ nest are in the remaining_tables. + // we are not in the nest anymore. tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; } + else + { + // Semi-join nest has: + // - a table being removed (not in the prefix) + // - some tables in the prefix. + tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + } } + +#ifndef DBUG_OFF + /* positions[idx] has been removed. Verify the state for [0...idx-1] */ + tab->join->dbug_verify_sj_inner_tables(idx); +#endif } @@ -3636,8 +3703,8 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN; join->best_positions[first].n_sj_tables= sjm->tables; /* - Do what advance_sj_state did: re-run best_access_path for every table - in the [last_inner_table + 1; pos..) range + Do what optimize_semi_joins did: re-run best_access_path for every + table in the [last_inner_table + 1; pos..) range */ double prefix_rec_count; /* Get the prefix record count */ @@ -4842,7 +4909,7 @@ int setup_semijoin_loosescan(JOIN *join) The choice between the strategies is made by the join optimizer (see - advance_sj_state() and fix_semijoin_strategies_for_picked_join_order()). + optimize_semi_joins() and fix_semijoin_strategies_for_picked_join_order()). This function sets up all fields/structures/etc needed for execution except for setup/initialization of semi-join materialization which is done in setup_sj_materialization() (todo: can't we move that to here also?) diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index 509fb370fd7..4744c49799a 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -310,9 +310,11 @@ public: }; -void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, - double *current_record_count, double *current_read_time, - POSITION *loose_scan_pos); +void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx, + double *current_record_count, + double *current_read_time, POSITION *loose_scan_pos); +void update_sj_state(JOIN *join, const JOIN_TAB *new_tab, + uint idx, table_map remaining_tables); void restore_prev_sj_state(const table_map remaining_tables, const JOIN_TAB *tab, uint idx); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index ee834f5d806..87144bd2d7d 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7721,6 +7721,10 @@ choose_plan(JOIN *join, table_map join_tables) { choose_initial_table_order(join); } + /* + Note: constant tables are already in the join prefix. We don't + put them into the cur_sj_inner_tables, though. + */ join->cur_sj_inner_tables= 0; if (straight_join) @@ -8023,8 +8027,8 @@ optimize_straight_join(JOIN *join, table_map join_tables) read_time= COST_ADD(read_time, COST_ADD(join->positions[idx].read_time, record_count / (double) TIME_FOR_COMPARE)); - advance_sj_state(join, join_tables, idx, &record_count, &read_time, - &loose_scan_pos); + optimize_semi_joins(join, join_tables, idx, &record_count, &read_time, + &loose_scan_pos); join_tables&= ~(s->table->map); double pushdown_cond_selectivity= 1.0; @@ -8201,6 +8205,12 @@ greedy_search(JOIN *join, /* This has been already checked by best_extension_by_limited_search */ DBUG_ASSERT(!is_interleave_error); + /* + Also, update the semi-join optimization state. Information about the + picked semi-join operation is in best_pos->...picker, but we need to + update the global state in the JOIN object, too. + */ + update_sj_state(join, best_table, idx, remaining_tables); /* find the position of 'best_table' in 'join->best_ref' */ best_idx= idx; @@ -8983,8 +8993,8 @@ best_extension_by_limited_search(JOIN *join, current_record_count / (double) TIME_FOR_COMPARE)); - advance_sj_state(join, remaining_tables, idx, ¤t_record_count, - ¤t_read_time, &loose_scan_pos); + optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count, + ¤t_read_time, &loose_scan_pos); /* Expand only partial plans with lower cost than the best QEP so far */ if (current_read_time >= join->best_read) diff --git a/sql/sql_select.h b/sql/sql_select.h index b37c62a4e51..4591aa7e143 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1259,9 +1259,15 @@ public: Bitmap of inner tables of semi-join nests that have a proper subset of their tables in the current join prefix. That is, of those semi-join nests that have their tables both in and outside of the join prefix. + (Note: tables that are constants but have not been pulled out of semi-join + nests are not considered part of semi-join nests) */ table_map cur_sj_inner_tables; - + +#ifndef DBUG_OFF + void dbug_verify_sj_inner_tables(uint n_positions) const; +#endif + /* We also maintain a stack of join optimization states in * join->positions[] */ /******* Join optimization state members end *******/ -- cgit v1.2.1 From 7c4efab9031c2b08af5caf0e3a2b01a70a3414bf Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 19 May 2022 13:58:31 +0200 Subject: typo fixed: [[space]] -> [[:space:]] --- scripts/wsrep_sst_rsync.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/wsrep_sst_rsync.sh b/scripts/wsrep_sst_rsync.sh index 1775281e634..994347d6f73 100644 --- a/scripts/wsrep_sst_rsync.sh +++ b/scripts/wsrep_sst_rsync.sh @@ -842,7 +842,7 @@ EOF fi if [ $WSREP_SST_OPT_BYPASS -eq 0 ]; then - if grep -m1 -qE "^$BYPASS_TAG([[space]]+.*)?\$" -- "$MAGIC_FILE"; then + if grep -m1 -qE "^$BYPASS_TAG([[:space:]]+.*)?\$" -- "$MAGIC_FILE"; then readonly WSREP_SST_OPT_BYPASS=1 readonly WSREP_TRANSFER_TYPE='IST' fi -- cgit v1.2.1 From 37ea0778733a21ae603788853ac4fb9a3be9915a Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Tue, 7 Jun 2022 15:49:41 +0200 Subject: main.help: flush help tables after modifying them otherwise following tests that crash the server will see them corrupted --- mysql-test/main/help.result | 10 +++++++--- mysql-test/main/help.test | 17 +++++++---------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/mysql-test/main/help.result b/mysql-test/main/help.result index 66850e75dc1..a3a236ee89e 100644 --- a/mysql-test/main/help.result +++ b/mysql-test/main/help.result @@ -274,12 +274,16 @@ delete from mysql.help_relation where help_keyword_id=@keyword1_id and help_topi delete from mysql.help_relation where help_keyword_id=@keyword2_id and help_topic_id=@topic1_id; delete from mysql.help_relation where help_keyword_id=@keyword3_id and help_topic_id=@topic3_id; delete from mysql.help_relation where help_keyword_id=@keyword3_id and help_topic_id=@topic4_id; -End of 4.1 tests. -DROP TABLE IF EXISTS t1; +flush tables; +# +# End of 4.1 tests. +# CREATE TABLE t1 (i INT); LOCK TABLES t1 WRITE; HELP no_such_topic; name is_it_category UNLOCK TABLES; DROP TABLE t1; -End of 5.1 tests. +# +# End of 5.1 tests. +# diff --git a/mysql-test/main/help.test b/mysql-test/main/help.test index 802f24f80a9..4f1ec5cef70 100644 --- a/mysql-test/main/help.test +++ b/mysql-test/main/help.test @@ -122,25 +122,22 @@ delete from mysql.help_relation where help_keyword_id=@keyword2_id and help_topi delete from mysql.help_relation where help_keyword_id=@keyword3_id and help_topic_id=@topic3_id; delete from mysql.help_relation where help_keyword_id=@keyword3_id and help_topic_id=@topic4_id; ---echo End of 4.1 tests. +flush tables; +--echo # +--echo # End of 4.1 tests. +--echo # # # Test that we can use HELP even under LOCK TABLES. See bug#9953: # CONVERT_TZ requires mysql.time_zone_name to be locked. # ---disable_warnings -DROP TABLE IF EXISTS t1; ---enable_warnings CREATE TABLE t1 (i INT); - LOCK TABLES t1 WRITE; - HELP no_such_topic; - UNLOCK TABLES; - DROP TABLE t1; - ---echo End of 5.1 tests. +--echo # +--echo # End of 5.1 tests. +--echo # -- cgit v1.2.1 From e240e8d0629e1a7e9f2c50d5eac81de590a766c4 Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 2 Jun 2022 19:02:01 +0300 Subject: removed some compiler warnings --- sql/partition_info.cc | 2 +- sql/slave.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/partition_info.cc b/sql/partition_info.cc index d45c8592bfe..ecec944c444 100644 --- a/sql/partition_info.cc +++ b/sql/partition_info.cc @@ -890,7 +890,7 @@ void partition_info::vers_check_limit(THD *thd) #ifndef DBUG_OFF const uint32 sub_factor= num_subparts ? num_subparts : 1; uint32 part_id= vers_info->hist_part->id * sub_factor; - const uint32 part_id_end= part_id + sub_factor; + const uint32 part_id_end __attribute__((unused)) = part_id + sub_factor; DBUG_ASSERT(part_id_end <= num_parts * sub_factor); #endif diff --git a/sql/slave.cc b/sql/slave.cc index 7f7f121db7e..8818029d444 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -6718,7 +6718,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) can be satisfied only with the strict mode that ensures against "genuine" gtid duplicates. */ - rpl_gtid *gtid_in_slave_state= + rpl_gtid *gtid_in_slave_state __attribute__((unused))= mi->gtid_current_pos.find(mi->last_queued_gtid.domain_id); // Slave gtid state must not have updated yet to the last received gtid. -- cgit v1.2.1 From 46c4fd45c3a4cb49ae10883785e5172f5edd23cd Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 2 Jun 2022 14:15:35 +0300 Subject: Fixed cost calculation for SELECT STRAIGHT_JOIN Main fix was replacing read_time+= with read_time I also did updated the 'identical' code in optimize_straight_join) and best_extension_by_limited_search() to make them eaiser to compare. Reviewer: Sergei Petrunia --- sql/sql_select.cc | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/sql/sql_select.cc b/sql/sql_select.cc index fa33f0e0500..501c2cd634d 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8942,7 +8942,7 @@ determine_search_depth(JOIN *join) */ static void -optimize_straight_join(JOIN *join, table_map join_tables) +optimize_straight_join(JOIN *join, table_map remaining_tables) { JOIN_TAB *s; uint idx= join->const_tables; @@ -8960,30 +8960,32 @@ optimize_straight_join(JOIN *join, table_map join_tables) Json_writer_object trace_one_table(thd); if (unlikely(thd->trace_started())) { - trace_plan_prefix(join, idx, join_tables); + trace_plan_prefix(join, idx, remaining_tables); trace_one_table.add_table_name(s); } /* Find the best access method from 's' to the current partial plan */ - best_access_path(join, s, join_tables, join->positions, idx, + best_access_path(join, s, remaining_tables, join->positions, idx, disable_jbuf, record_count, position, &loose_scan_pos); - /* compute the cost of the new plan extended with 's' */ + /* Compute the cost of the new plan extended with 's' */ record_count= COST_MULT(record_count, position->records_read); const double filter_cmp_gain= position->range_rowid_filter_info ? position->range_rowid_filter_info->get_cmp_gain(record_count) : 0; - read_time+= COST_ADD(read_time - filter_cmp_gain, - COST_ADD(position->read_time, - record_count / TIME_FOR_COMPARE)); - advance_sj_state(join, join_tables, idx, &record_count, &read_time, + read_time= COST_ADD(read_time, + COST_ADD(position->read_time - + filter_cmp_gain, + record_count / + TIME_FOR_COMPARE)); + advance_sj_state(join, remaining_tables, idx, &record_count, &read_time, &loose_scan_pos); - join_tables&= ~(s->table->map); + remaining_tables&= ~(s->table->map); double pushdown_cond_selectivity= 1.0; if (use_cond_selectivity > 1) pushdown_cond_selectivity= table_cond_selectivity(join, idx, s, - join_tables); + remaining_tables); position->cond_selectivity= pushdown_cond_selectivity; ++idx; } @@ -9965,16 +9967,16 @@ best_extension_by_limited_search(JOIN *join, best_access_path(join, s, remaining_tables, join->positions, idx, disable_jbuf, record_count, position, &loose_scan_pos); - /* Compute the cost of extending the plan with 's' */ + /* Compute the cost of the new plan extended with 's' */ current_record_count= COST_MULT(record_count, position->records_read); const double filter_cmp_gain= position->range_rowid_filter_info ? position->range_rowid_filter_info->get_cmp_gain(current_record_count) : 0; - current_read_time=COST_ADD(read_time, - COST_ADD(position->read_time - - filter_cmp_gain, - current_record_count / - TIME_FOR_COMPARE)); + current_read_time= COST_ADD(read_time, + COST_ADD(position->read_time - + filter_cmp_gain, + current_record_count / + TIME_FOR_COMPARE)); if (unlikely(thd->trace_started())) { -- cgit v1.2.1 From f0ea7f7f3324a54e08431d5935fab1116db33818 Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Mon, 6 Jun 2022 22:21:22 +0300 Subject: MDEV-28749: restore_prev_nj_state() doesn't update cur_sj_inner_tables correctly (Try 2) The code that updates semi-join optimization state for a join order prefix had several bugs. The visible effect was bad optimization for FirstMatch or LooseScan strategies: they either weren't considered when they should have been, or considered when they shouldn't have been. In order to hit the bug, the optimizer needs to consider several different join prefixes in a certain order. Queries with "obvious" query plans which prune all join orders except one are not affected. Internally, the bugs in updates of semi-join state were: 1. restore_prev_sj_state() assumed that "we assume remaining_tables doesnt contain @tab" which wasn't true. 2. Another bug in this function: it did remove bits from join->cur_sj_inner_tables but never added them. 3. greedy_search() adds tables into the join prefix but neglects to update the semi-join optimization state. (It does update nested outer join state, see this call: check_interleaving_with_nj(best_table) but there's no matching call to update the semi-join state. (This wasn't visible because most of the state is in the POSITION structure which is updated. But there is also state in JOIN, too) The patch: - Fixes all of the above - Adds JOIN::dbug_verify_sj_inner_tables() which is used to verify the state is correct at every step. - Renames advance_sj_state() to optimize_semi_joins(). = Introduces update_sj_state() which ideally should have been called "advance_sj_state" but I didn't reuse the name to not create confusion. --- mysql-test/main/subselect_sj.result | 6 +- mysql-test/main/subselect_sj_jcl6.result | 8 +-- sql/opt_subselect.cc | 103 +++++++++++++++++++++++++------ sql/opt_subselect.h | 8 ++- sql/sql_select.cc | 18 ++++-- sql/sql_select.h | 8 ++- 6 files changed, 118 insertions(+), 33 deletions(-) diff --git a/mysql-test/main/subselect_sj.result b/mysql-test/main/subselect_sj.result index 9fd8186b66c..e9a484bbcbf 100644 --- a/mysql-test/main/subselect_sj.result +++ b/mysql-test/main/subselect_sj.result @@ -2178,10 +2178,10 @@ INSERT INTO t5 VALUES (7,0),(9,0); explain SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY t5 index a a 10 NULL 2 Using index; Start temporary +1 PRIMARY t5 index a a 10 NULL 2 Using where; Using index; LooseScan 1 PRIMARY t4 ALL NULL NULL NULL NULL 3 -1 PRIMARY t2 ALL b NULL NULL NULL 10 Using where -1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; End temporary; Using join buffer (flat, BNL join) +1 PRIMARY t2 ref b b 5 test.t5.b 2 Using where; FirstMatch(t5) +1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; Using join buffer (flat, BNL join) SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); a 0 diff --git a/mysql-test/main/subselect_sj_jcl6.result b/mysql-test/main/subselect_sj_jcl6.result index e97c1d5e915..c485b5e5f39 100644 --- a/mysql-test/main/subselect_sj_jcl6.result +++ b/mysql-test/main/subselect_sj_jcl6.result @@ -2189,10 +2189,10 @@ INSERT INTO t5 VALUES (7,0),(9,0); explain SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY t5 index a a 10 NULL 2 Using index; Start temporary -1 PRIMARY t4 ALL NULL NULL NULL NULL 3 Using join buffer (flat, BNL join) -1 PRIMARY t2 ALL b NULL NULL NULL 10 Using where; Using join buffer (incremental, BNL join) -1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; End temporary; Using join buffer (incremental, BNL join) +1 PRIMARY t5 index a a 10 NULL 2 Using where; Using index; LooseScan +1 PRIMARY t4 ALL NULL NULL NULL NULL 3 +1 PRIMARY t2 ref b b 5 test.t5.b 2 Using where; FirstMatch(t5) +1 PRIMARY t3 ALL NULL NULL NULL NULL 15 Using where; Using join buffer (flat, BNL join) SELECT * FROM t3 WHERE t3.a IN (SELECT t5.a FROM t2, t4, t5 WHERE t2.c = t5.a AND t2.b = t5.b); a 0 diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 88b1428f2b1..3a1ebd9ec57 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -180,7 +180,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - optimize_semijoin_nests() does pre-optimization - during join optimization, the join has one JOIN_TAB (or is it POSITION?) - array, and suffix-based detection is used, see advance_sj_state() + array, and suffix-based detection is used, see optimize_semi_joins() - after join optimization is done, get_best_combination() switches the data-structure to prefix-based, multiple JOIN_TAB ranges format. @@ -2761,7 +2761,7 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) Do semi-join optimization step after we've added a new tab to join prefix SYNOPSIS - advance_sj_state() + optimize_semi_joins() join The join we're optimizing remaining_tables Tables not in the join prefix new_join_tab Join tab we've just added to the join prefix @@ -2821,9 +2821,9 @@ bool is_multiple_semi_joins(JOIN *join, POSITION *prefix, uint idx, table_map in } -void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, - double *current_record_count, double *current_read_time, - POSITION *loose_scan_pos) +void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx, + double *current_record_count, + double *current_read_time, POSITION *loose_scan_pos) { POSITION *pos= join->positions + idx; const JOIN_TAB *new_join_tab= pos->table; @@ -3014,19 +3014,36 @@ void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, } } - if ((emb_sj_nest= new_join_tab->emb_sj_nest)) + update_sj_state(join, new_join_tab, idx, remaining_tables); + + pos->prefix_cost.convert_from_cost(*current_read_time); + pos->prefix_record_count= *current_record_count; + pos->dups_producing_tables= dups_producing_tables; +} + + +/* + Update JOIN's semi-join optimization state after the join tab new_tab + has been added into the join prefix. + + @seealso restore_prev_sj_state() does the reverse actoion +*/ + +void update_sj_state(JOIN *join, const JOIN_TAB *new_tab, + uint idx, table_map remaining_tables) +{ + if (TABLE_LIST *emb_sj_nest= new_tab->emb_sj_nest) { join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ if (!(remaining_tables & - emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map)) + emb_sj_nest->sj_inner_tables & ~new_tab->table->map)) join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; } - - pos->prefix_cost.convert_from_cost(*current_read_time); - pos->prefix_record_count= *current_record_count; - pos->dups_producing_tables= dups_producing_tables; +#ifndef DBUG_OFF + join->dbug_verify_sj_inner_tables(idx + 1); +#endif } @@ -3579,10 +3596,45 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, return FALSE; } +#ifndef DBUG_OFF +/* + Verify the value of JOIN::cur_sj_inner_tables by recomputing it +*/ +void JOIN::dbug_verify_sj_inner_tables(uint prefix_size) const +{ + table_map cur_map= const_table_map; + table_map nests_entered= 0; + if (emb_sjm_nest) + { + DBUG_ASSERT(cur_sj_inner_tables == 0); + return; + } + + for (uint i= const_tables; i < prefix_size; i++) + { + JOIN_TAB *tab= positions[i].table; + cur_map |= tab->table->map; + if (TABLE_LIST *sj_nest= tab->emb_sj_nest) + { + nests_entered |= sj_nest->sj_inner_tables; + if (!(sj_nest->sj_inner_tables & ~cur_map)) + { + // all nest tables are in the prefix already + nests_entered &= ~sj_nest->sj_inner_tables; + } + } + } + DBUG_ASSERT(nests_entered == cur_sj_inner_tables); +} +#endif /* Remove the last join tab from from join->cur_sj_inner_tables bitmap - we assume remaining_tables doesnt contain @tab. + + @note + remaining_tables contains @tab. + + @seealso update_sj_state() does the reverse */ void restore_prev_sj_state(const table_map remaining_tables, @@ -3596,15 +3648,30 @@ void restore_prev_sj_state(const table_map remaining_tables, tab->join->sjm_lookup_tables &= ~subq_tables; } - if ((emb_sj_nest= tab->emb_sj_nest)) + if (!tab->join->emb_sjm_nest && (emb_sj_nest= tab->emb_sj_nest)) { + table_map subq_tables= emb_sj_nest->sj_inner_tables & + ~tab->join->const_table_map; /* If we're removing the last SJ-inner table, remove the sj-nest */ - if ((remaining_tables & emb_sj_nest->sj_inner_tables) == - (emb_sj_nest->sj_inner_tables & ~tab->table->map)) + if ((remaining_tables & subq_tables) == subq_tables) { + // All non-const tables of the SJ nest are in the remaining_tables. + // we are not in the nest anymore. tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; } + else + { + // Semi-join nest has: + // - a table being removed (not in the prefix) + // - some tables in the prefix. + tab->join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + } } + +#ifndef DBUG_OFF + /* positions[idx] has been removed. Verify the state for [0...idx-1] */ + tab->join->dbug_verify_sj_inner_tables(idx); +#endif } @@ -3831,8 +3898,8 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join) join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN; join->best_positions[first].n_sj_tables= sjm->tables; /* - Do what advance_sj_state did: re-run best_access_path for every table - in the [last_inner_table + 1; pos..) range + Do what optimize_semi_joins did: re-run best_access_path for every + table in the [last_inner_table + 1; pos..) range */ double prefix_rec_count; /* Get the prefix record count */ @@ -5086,7 +5153,7 @@ int setup_semijoin_loosescan(JOIN *join) The choice between the strategies is made by the join optimizer (see - advance_sj_state() and fix_semijoin_strategies_for_picked_join_order()). + optimize_semi_joins() and fix_semijoin_strategies_for_picked_join_order()). This function sets up all fields/structures/etc needed for execution except for setup/initialization of semi-join materialization which is done in setup_sj_materialization() (todo: can't we move that to here also?) diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index abd37f1e98e..7b1b810ee81 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -314,9 +314,11 @@ public: }; -void advance_sj_state(JOIN *join, table_map remaining_tables, uint idx, - double *current_record_count, double *current_read_time, - POSITION *loose_scan_pos); +void optimize_semi_joins(JOIN *join, table_map remaining_tables, uint idx, + double *current_record_count, + double *current_read_time, POSITION *loose_scan_pos); +void update_sj_state(JOIN *join, const JOIN_TAB *new_tab, + uint idx, table_map remaining_tables); void restore_prev_sj_state(const table_map remaining_tables, const JOIN_TAB *tab, uint idx); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 501c2cd634d..a059986d611 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -8663,6 +8663,10 @@ choose_plan(JOIN *join, table_map join_tables) { choose_initial_table_order(join); } + /* + Note: constant tables are already in the join prefix. We don't + put them into the cur_sj_inner_tables, though. + */ join->cur_sj_inner_tables= 0; if (straight_join) @@ -8978,8 +8982,8 @@ optimize_straight_join(JOIN *join, table_map remaining_tables) filter_cmp_gain, record_count / TIME_FOR_COMPARE)); - advance_sj_state(join, remaining_tables, idx, &record_count, &read_time, - &loose_scan_pos); + optimize_semi_joins(join, remaining_tables, idx, &record_count, &read_time, + &loose_scan_pos); remaining_tables&= ~(s->table->map); double pushdown_cond_selectivity= 1.0; @@ -9159,6 +9163,12 @@ greedy_search(JOIN *join, /* This has been already checked by best_extension_by_limited_search */ DBUG_ASSERT(!is_interleave_error); + /* + Also, update the semi-join optimization state. Information about the + picked semi-join operation is in best_pos->...picker, but we need to + update the global state in the JOIN object, too. + */ + update_sj_state(join, best_table, idx, remaining_tables); /* find the position of 'best_table' in 'join->best_ref' */ best_idx= idx; @@ -9983,8 +9993,8 @@ best_extension_by_limited_search(JOIN *join, trace_one_table.add("rows_for_plan", current_record_count); trace_one_table.add("cost_for_plan", current_read_time); } - advance_sj_state(join, remaining_tables, idx, ¤t_record_count, - ¤t_read_time, &loose_scan_pos); + optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count, + ¤t_read_time, &loose_scan_pos); /* Expand only partial plans with lower cost than the best QEP so far */ if (current_read_time >= join->best_read) diff --git a/sql/sql_select.h b/sql/sql_select.h index 4a2929207a5..e65267558e1 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -1307,9 +1307,15 @@ public: Bitmap of inner tables of semi-join nests that have a proper subset of their tables in the current join prefix. That is, of those semi-join nests that have their tables both in and outside of the join prefix. + (Note: tables that are constants but have not been pulled out of semi-join + nests are not considered part of semi-join nests) */ table_map cur_sj_inner_tables; - + +#ifndef DBUG_OFF + void dbug_verify_sj_inner_tables(uint n_positions) const; +#endif + /* We also maintain a stack of join optimization states in * join->positions[] */ /******* Join optimization state members end *******/ -- cgit v1.2.1 From 64f24b776dfdb8bcc37cc9d5be022a8af28f76b0 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sun, 15 May 2022 15:46:29 +0300 Subject: greedy_search() and best_extension_by_limited_search() scrambled table order best_extension_by_limited_search() assumes that tables should be sorted according to size to be able to quickly disregard bad plans. However the current usage of swap_variables() will change the table order to a not sorted one for the next recursive call. This breaks the assumtion and causes performance issues when using many tables (we have to examine many more plans). This patch fixes this by ensuring that the original table order is kept for the not yet used tables when best_extension_by_limited_search() is called. This was done by always calling swap_variables() for each table and restoring the original table order at exit. Some test changed: - In a majority of the test the change was that two "identical tables" where swapped and the optimzer is now using the first/smaller table - In few test the table order was changed. The new plan looks identical or slighly better than the original. --- mysql-test/main/derived_cond_pushdown.result | 36 +++---- mysql-test/main/join_nested.result | 4 +- mysql-test/main/join_nested_jcl6.result | 6 +- mysql-test/main/opt_trace.result | 140 +++++++++++++-------------- mysql-test/main/selectivity.result | 4 +- mysql-test/main/subselect3.result | 4 +- mysql-test/main/subselect3_jcl6.result | 4 +- mysql-test/main/subselect_sj.result | 4 +- mysql-test/main/subselect_sj2.result | 4 +- mysql-test/main/subselect_sj2_jcl6.result | 8 +- mysql-test/main/subselect_sj2_mat.result | 16 +-- mysql-test/main/subselect_sj_jcl6.result | 4 +- sql/sql_select.cc | 41 ++++++-- 13 files changed, 146 insertions(+), 129 deletions(-) diff --git a/mysql-test/main/derived_cond_pushdown.result b/mysql-test/main/derived_cond_pushdown.result index 275459bd868..d15f368e08d 100644 --- a/mysql-test/main/derived_cond_pushdown.result +++ b/mysql-test/main/derived_cond_pushdown.result @@ -3454,10 +3454,10 @@ a b max_c avg_c a b max_c avg_c a b min_c a b c d 6 20 315 279.3333 6 20 315 279.3333 6 20 214 6 20 315 279 6 20 315 279.3333 6 20 315 279.3333 8 33 114 8 80 800 314 6 20 315 279.3333 6 20 315 279.3333 6 20 214 6 23 303 909 -6 20 315 279.3333 8 33 404 213.6667 6 20 214 6 20 315 279 -6 20 315 279.3333 8 33 404 213.6667 6 20 214 6 23 303 909 8 33 404 213.6667 6 20 315 279.3333 8 33 114 8 64 248 107 8 33 404 213.6667 6 20 315 279.3333 8 33 114 8 80 800 314 +6 20 315 279.3333 8 33 404 213.6667 6 20 214 6 20 315 279 +6 20 315 279.3333 8 33 404 213.6667 6 20 214 6 23 303 909 8 33 404 213.6667 8 33 404 213.6667 7 11 708 7 13 312 406 8 33 404 213.6667 8 33 404 213.6667 8 33 114 8 64 248 107 8 33 404 213.6667 8 33 404 213.6667 6 20 214 6 20 315 279 @@ -3472,10 +3472,10 @@ a b max_c avg_c a b max_c avg_c a b min_c a b c d 6 20 315 279.3333 6 20 315 279.3333 6 20 214 6 20 315 279 6 20 315 279.3333 6 20 315 279.3333 8 33 114 8 80 800 314 6 20 315 279.3333 6 20 315 279.3333 6 20 214 6 23 303 909 -6 20 315 279.3333 8 33 404 213.6667 6 20 214 6 20 315 279 -6 20 315 279.3333 8 33 404 213.6667 6 20 214 6 23 303 909 8 33 404 213.6667 6 20 315 279.3333 8 33 114 8 64 248 107 8 33 404 213.6667 6 20 315 279.3333 8 33 114 8 80 800 314 +6 20 315 279.3333 8 33 404 213.6667 6 20 214 6 20 315 279 +6 20 315 279.3333 8 33 404 213.6667 6 20 214 6 23 303 909 8 33 404 213.6667 8 33 404 213.6667 7 11 708 7 13 312 406 8 33 404 213.6667 8 33 404 213.6667 8 33 114 8 64 248 107 8 33 404 213.6667 8 33 404 213.6667 6 20 214 6 20 315 279 @@ -3487,8 +3487,8 @@ and (v1.max_c<500) and (v3.a=t2.a) and (v2.max_c>300); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where 1 PRIMARY ref key0 key0 5 test.t2.a 2 Using where -1 PRIMARY ALL NULL NULL NULL NULL 20 Using where; Using join buffer (flat, BNL join) -1 PRIMARY ALL NULL NULL NULL NULL 20 Using where; Using join buffer (incremental, BNL join) +1 PRIMARY ALL NULL NULL NULL NULL 20 Using where; Using join buffer (flat, BNL join) +1 PRIMARY ALL NULL NULL NULL NULL 20 Using where; Using join buffer (incremental, BNL join) 4 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 3 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using temporary; Using filesort @@ -3538,19 +3538,19 @@ EXPLAIN }, "block-nl-join": { "table": { - "table_name": "", + "table_name": "", "access_type": "ALL", "rows": 20, "filtered": 100, - "attached_condition": "v2.max_c > 300" + "attached_condition": "v1.max_c < 500" }, "buffer_type": "flat", "buffer_size": "715", "join_type": "BNL", "materialized": { "query_block": { - "select_id": 3, - "having_condition": "max_c < 707 and max_c > 300", + "select_id": 2, + "having_condition": "max_c < 707 and max_c < 500", "filesort": { "sort_key": "t1.a, t1.b", "temporary_table": { @@ -3558,8 +3558,7 @@ EXPLAIN "table_name": "t1", "access_type": "ALL", "rows": 20, - "filtered": 100, - "attached_condition": "t1.a > 5" + "filtered": 100 } } } @@ -3568,20 +3567,20 @@ EXPLAIN }, "block-nl-join": { "table": { - "table_name": "", + "table_name": "", "access_type": "ALL", "rows": 20, "filtered": 100, - "attached_condition": "v1.max_c < 500" + "attached_condition": "v2.max_c > 300" }, "buffer_type": "incremental", "buffer_size": "9Kb", "join_type": "BNL", - "attached_condition": "v1.a = v2.a or v1.a = t2.a", + "attached_condition": "v2.a = v1.a or v1.a = t2.a", "materialized": { "query_block": { - "select_id": 2, - "having_condition": "max_c < 707 and max_c < 500", + "select_id": 3, + "having_condition": "max_c < 707 and max_c > 300", "filesort": { "sort_key": "t1.a, t1.b", "temporary_table": { @@ -3589,7 +3588,8 @@ EXPLAIN "table_name": "t1", "access_type": "ALL", "rows": 20, - "filtered": 100 + "filtered": 100, + "attached_condition": "t1.a > 5" } } } diff --git a/mysql-test/main/join_nested.result b/mysql-test/main/join_nested.result index b323190d8d5..0c7a1b48940 100644 --- a/mysql-test/main/join_nested.result +++ b/mysql-test/main/join_nested.result @@ -916,11 +916,11 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 1 SIMPLE t6 ALL NULL NULL NULL NULL 3 100.00 Using where 1 SIMPLE t8 ALL NULL NULL NULL NULL 2 100.00 Using where 1 SIMPLE t2 ALL NULL NULL NULL NULL 8 100.00 Using where -1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 Using where 1 SIMPLE t3 ALL NULL NULL NULL NULL 2 100.00 Using where +1 SIMPLE t4 ALL NULL NULL NULL NULL 2 100.00 Using where 1 SIMPLE t9 ALL NULL NULL NULL NULL 3 100.00 Using where; Using join buffer (flat, BNL join) Warnings: -Note 1003 select `test`.`t0`.`a` AS `a`,`test`.`t0`.`b` AS `b`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b`,`test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t5`.`a` AS `a`,`test`.`t5`.`b` AS `b`,`test`.`t6`.`a` AS `a`,`test`.`t6`.`b` AS `b`,`test`.`t7`.`a` AS `a`,`test`.`t7`.`b` AS `b`,`test`.`t8`.`a` AS `a`,`test`.`t8`.`b` AS `b`,`test`.`t9`.`a` AS `a`,`test`.`t9`.`b` AS `b` from `test`.`t0` join `test`.`t1` left join (`test`.`t2` left join (`test`.`t3` join `test`.`t4`) on(`test`.`t3`.`a` = 1 and `test`.`t4`.`b` = `test`.`t2`.`b` and `test`.`t2`.`a` > 0) join `test`.`t5` left join (`test`.`t6` join `test`.`t7` left join `test`.`t8` on(`test`.`t8`.`b` = `test`.`t5`.`b` and `test`.`t6`.`b` < 10)) on(`test`.`t7`.`b` = `test`.`t5`.`b` and `test`.`t6`.`b` >= 2)) on((`test`.`t3`.`b` = 2 or `test`.`t3`.`c` is null) and (`test`.`t6`.`b` = 2 or `test`.`t6`.`c` is null) and (`test`.`t5`.`b` = `test`.`t0`.`b` or `test`.`t3`.`c` is null or `test`.`t6`.`c` is null or `test`.`t8`.`c` is null) and `test`.`t1`.`a` <> 2) join `test`.`t9` where `test`.`t0`.`a` = 1 and `test`.`t1`.`b` = `test`.`t0`.`b` and `test`.`t9`.`a` = 1 and (`test`.`t2`.`a` >= 4 or `test`.`t2`.`c` is null) and (`test`.`t3`.`a` < 5 or `test`.`t3`.`c` is null) and (`test`.`t3`.`b` = `test`.`t4`.`b` or `test`.`t3`.`c` is null or `test`.`t4`.`c` is null) and (`test`.`t5`.`a` >= 2 or `test`.`t5`.`c` is null) and (`test`.`t6`.`a` >= 4 or `test`.`t6`.`c` is null) and (`test`.`t7`.`a` <= 2 or `test`.`t7`.`c` is null) and (`test`.`t8`.`a` < 1 or `test`.`t8`.`c` is null) and (`test`.`t9`.`b` = `test`.`t8`.`b` or `test`.`t8`.`c` is null) +Note 1003 select `test`.`t0`.`a` AS `a`,`test`.`t0`.`b` AS `b`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b`,`test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t5`.`a` AS `a`,`test`.`t5`.`b` AS `b`,`test`.`t6`.`a` AS `a`,`test`.`t6`.`b` AS `b`,`test`.`t7`.`a` AS `a`,`test`.`t7`.`b` AS `b`,`test`.`t8`.`a` AS `a`,`test`.`t8`.`b` AS `b`,`test`.`t9`.`a` AS `a`,`test`.`t9`.`b` AS `b` from `test`.`t0` join `test`.`t1` left join (`test`.`t2` left join (`test`.`t3` join `test`.`t4`) on(`test`.`t3`.`a` = 1 and `test`.`t4`.`b` = `test`.`t2`.`b` and `test`.`t2`.`a` > 0) join `test`.`t5` left join (`test`.`t6` join `test`.`t7` left join `test`.`t8` on(`test`.`t8`.`b` = `test`.`t5`.`b` and `test`.`t6`.`b` < 10)) on(`test`.`t7`.`b` = `test`.`t5`.`b` and `test`.`t6`.`b` >= 2)) on((`test`.`t3`.`b` = 2 or `test`.`t3`.`c` is null) and (`test`.`t6`.`b` = 2 or `test`.`t6`.`c` is null) and (`test`.`t5`.`b` = `test`.`t0`.`b` or `test`.`t3`.`c` is null or `test`.`t6`.`c` is null or `test`.`t8`.`c` is null) and `test`.`t1`.`a` <> 2) join `test`.`t9` where `test`.`t0`.`a` = 1 and `test`.`t1`.`b` = `test`.`t0`.`b` and `test`.`t9`.`a` = 1 and (`test`.`t2`.`a` >= 4 or `test`.`t2`.`c` is null) and (`test`.`t3`.`a` < 5 or `test`.`t3`.`c` is null) and (`test`.`t4`.`b` = `test`.`t3`.`b` or `test`.`t3`.`c` is null or `test`.`t4`.`c` is null) and (`test`.`t5`.`a` >= 2 or `test`.`t5`.`c` is null) and (`test`.`t6`.`a` >= 4 or `test`.`t6`.`c` is null) and (`test`.`t7`.`a` <= 2 or `test`.`t7`.`c` is null) and (`test`.`t8`.`a` < 1 or `test`.`t8`.`c` is null) and (`test`.`t9`.`b` = `test`.`t8`.`b` or `test`.`t8`.`c` is null) INSERT INTO t4 VALUES (-3,12,0), (-4,13,0), (-1,11,0), (-3,11,0), (-5,15,0); INSERT INTO t5 VALUES (-3,11,0), (-2,12,0), (-3,13,0), (-4,12,0); CREATE INDEX idx_b ON t4(b); diff --git a/mysql-test/main/join_nested_jcl6.result b/mysql-test/main/join_nested_jcl6.result index f7d0242244d..31f5c794071 100644 --- a/mysql-test/main/join_nested_jcl6.result +++ b/mysql-test/main/join_nested_jcl6.result @@ -925,11 +925,11 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 1 SIMPLE t6 ALL NULL NULL NULL NULL 3 100.00 Using where; Using join buffer (incremental, BNL join) 1 SIMPLE t8 hash_ALL NULL #hash#$hj 5 test.t5.b 2 100.00 Using where; Using join buffer (incremental, BNLH join) 1 SIMPLE t2 ALL NULL NULL NULL NULL 8 100.00 Using where; Using join buffer (incremental, BNL join) -1 SIMPLE t4 hash_ALL NULL #hash#$hj 5 test.t2.b 2 100.00 Using where; Using join buffer (incremental, BNLH join) 1 SIMPLE t3 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (incremental, BNL join) +1 SIMPLE t4 hash_ALL NULL #hash#$hj 5 test.t2.b 2 100.00 Using where; Using join buffer (incremental, BNLH join) 1 SIMPLE t9 ALL NULL NULL NULL NULL 3 100.00 Using where; Using join buffer (incremental, BNL join) Warnings: -Note 1003 select `test`.`t0`.`a` AS `a`,`test`.`t0`.`b` AS `b`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b`,`test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t5`.`a` AS `a`,`test`.`t5`.`b` AS `b`,`test`.`t6`.`a` AS `a`,`test`.`t6`.`b` AS `b`,`test`.`t7`.`a` AS `a`,`test`.`t7`.`b` AS `b`,`test`.`t8`.`a` AS `a`,`test`.`t8`.`b` AS `b`,`test`.`t9`.`a` AS `a`,`test`.`t9`.`b` AS `b` from `test`.`t0` join `test`.`t1` left join (`test`.`t2` left join (`test`.`t3` join `test`.`t4`) on(`test`.`t3`.`a` = 1 and `test`.`t4`.`b` = `test`.`t2`.`b` and `test`.`t2`.`a` > 0 and `test`.`t2`.`b` is not null) join `test`.`t5` left join (`test`.`t6` join `test`.`t7` left join `test`.`t8` on(`test`.`t8`.`b` = `test`.`t5`.`b` and `test`.`t6`.`b` < 10 and `test`.`t5`.`b` is not null)) on(`test`.`t7`.`b` = `test`.`t5`.`b` and `test`.`t6`.`b` >= 2 and `test`.`t5`.`b` is not null)) on((`test`.`t3`.`b` = 2 or `test`.`t3`.`c` is null) and (`test`.`t6`.`b` = 2 or `test`.`t6`.`c` is null) and (`test`.`t5`.`b` = `test`.`t0`.`b` or `test`.`t3`.`c` is null or `test`.`t6`.`c` is null or `test`.`t8`.`c` is null) and `test`.`t1`.`a` <> 2) join `test`.`t9` where `test`.`t0`.`a` = 1 and `test`.`t1`.`b` = `test`.`t0`.`b` and `test`.`t9`.`a` = 1 and (`test`.`t2`.`a` >= 4 or `test`.`t2`.`c` is null) and (`test`.`t3`.`a` < 5 or `test`.`t3`.`c` is null) and (`test`.`t3`.`b` = `test`.`t4`.`b` or `test`.`t3`.`c` is null or `test`.`t4`.`c` is null) and (`test`.`t5`.`a` >= 2 or `test`.`t5`.`c` is null) and (`test`.`t6`.`a` >= 4 or `test`.`t6`.`c` is null) and (`test`.`t7`.`a` <= 2 or `test`.`t7`.`c` is null) and (`test`.`t8`.`a` < 1 or `test`.`t8`.`c` is null) and (`test`.`t9`.`b` = `test`.`t8`.`b` or `test`.`t8`.`c` is null) +Note 1003 select `test`.`t0`.`a` AS `a`,`test`.`t0`.`b` AS `b`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b`,`test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t5`.`a` AS `a`,`test`.`t5`.`b` AS `b`,`test`.`t6`.`a` AS `a`,`test`.`t6`.`b` AS `b`,`test`.`t7`.`a` AS `a`,`test`.`t7`.`b` AS `b`,`test`.`t8`.`a` AS `a`,`test`.`t8`.`b` AS `b`,`test`.`t9`.`a` AS `a`,`test`.`t9`.`b` AS `b` from `test`.`t0` join `test`.`t1` left join (`test`.`t2` left join (`test`.`t3` join `test`.`t4`) on(`test`.`t3`.`a` = 1 and `test`.`t4`.`b` = `test`.`t2`.`b` and `test`.`t2`.`a` > 0 and `test`.`t2`.`b` is not null) join `test`.`t5` left join (`test`.`t6` join `test`.`t7` left join `test`.`t8` on(`test`.`t8`.`b` = `test`.`t5`.`b` and `test`.`t6`.`b` < 10 and `test`.`t5`.`b` is not null)) on(`test`.`t7`.`b` = `test`.`t5`.`b` and `test`.`t6`.`b` >= 2 and `test`.`t5`.`b` is not null)) on((`test`.`t3`.`b` = 2 or `test`.`t3`.`c` is null) and (`test`.`t6`.`b` = 2 or `test`.`t6`.`c` is null) and (`test`.`t5`.`b` = `test`.`t0`.`b` or `test`.`t3`.`c` is null or `test`.`t6`.`c` is null or `test`.`t8`.`c` is null) and `test`.`t1`.`a` <> 2) join `test`.`t9` where `test`.`t0`.`a` = 1 and `test`.`t1`.`b` = `test`.`t0`.`b` and `test`.`t9`.`a` = 1 and (`test`.`t2`.`a` >= 4 or `test`.`t2`.`c` is null) and (`test`.`t3`.`a` < 5 or `test`.`t3`.`c` is null) and (`test`.`t4`.`b` = `test`.`t3`.`b` or `test`.`t3`.`c` is null or `test`.`t4`.`c` is null) and (`test`.`t5`.`a` >= 2 or `test`.`t5`.`c` is null) and (`test`.`t6`.`a` >= 4 or `test`.`t6`.`c` is null) and (`test`.`t7`.`a` <= 2 or `test`.`t7`.`c` is null) and (`test`.`t8`.`a` < 1 or `test`.`t8`.`c` is null) and (`test`.`t9`.`b` = `test`.`t8`.`b` or `test`.`t8`.`c` is null) INSERT INTO t4 VALUES (-3,12,0), (-4,13,0), (-1,11,0), (-3,11,0), (-5,15,0); INSERT INTO t5 VALUES (-3,11,0), (-2,12,0), (-3,13,0), (-4,12,0); CREATE INDEX idx_b ON t4(b); @@ -1027,8 +1027,8 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 1 SIMPLE t3 ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (incremental, BNL join) 1 SIMPLE t4 ref idx_b idx_b 5 test.t2.b 2 100.00 Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan 1 SIMPLE t5 ALL idx_b NULL NULL NULL 7 100.00 Using where; Using join buffer (incremental, BNL join) -1 SIMPLE t6 ALL NULL NULL NULL NULL 3 100.00 Using where; Using join buffer (incremental, BNL join) 1 SIMPLE t7 hash_ALL NULL #hash#$hj 5 test.t5.b 2 100.00 Using where; Using join buffer (incremental, BNLH join) +1 SIMPLE t6 ALL NULL NULL NULL NULL 3 100.00 Using where; Using join buffer (incremental, BNL join) 1 SIMPLE t8 ref idx_b idx_b 5 test.t5.b 2 100.00 Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan Warnings: Note 1003 select `test`.`t0`.`a` AS `a`,`test`.`t0`.`b` AS `b`,`test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b`,`test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t5`.`a` AS `a`,`test`.`t5`.`b` AS `b`,`test`.`t6`.`a` AS `a`,`test`.`t6`.`b` AS `b`,`test`.`t7`.`a` AS `a`,`test`.`t7`.`b` AS `b`,`test`.`t8`.`a` AS `a`,`test`.`t8`.`b` AS `b`,`test`.`t9`.`a` AS `a`,`test`.`t9`.`b` AS `b` from `test`.`t0` join `test`.`t1` left join (`test`.`t2` left join (`test`.`t3` join `test`.`t4`) on(`test`.`t3`.`a` = 1 and `test`.`t4`.`b` = `test`.`t2`.`b` and `test`.`t2`.`a` > 0 and `test`.`t4`.`a` > 0 and `test`.`t2`.`b` is not null) join `test`.`t5` left join (`test`.`t6` join `test`.`t7` left join `test`.`t8` on(`test`.`t8`.`b` = `test`.`t5`.`b` and `test`.`t6`.`b` < 10 and `test`.`t8`.`a` >= 0 and `test`.`t5`.`b` is not null)) on(`test`.`t7`.`b` = `test`.`t5`.`b` and `test`.`t6`.`b` >= 2 and `test`.`t5`.`a` > 0 and `test`.`t5`.`b` is not null)) on((`test`.`t3`.`b` = 2 or `test`.`t3`.`c` is null) and (`test`.`t6`.`b` = 2 or `test`.`t6`.`c` is null) and (`test`.`t5`.`b` = `test`.`t0`.`b` or `test`.`t3`.`c` is null or `test`.`t6`.`c` is null or `test`.`t8`.`c` is null) and `test`.`t1`.`a` <> 2) join `test`.`t9` where `test`.`t0`.`a` = 1 and `test`.`t1`.`b` = `test`.`t0`.`b` and `test`.`t9`.`a` = 1 and (`test`.`t2`.`a` >= 4 or `test`.`t2`.`c` is null) and (`test`.`t3`.`a` < 5 or `test`.`t3`.`c` is null) and (`test`.`t4`.`b` = `test`.`t3`.`b` or `test`.`t3`.`c` is null or `test`.`t4`.`c` is null) and (`test`.`t5`.`a` >= 2 or `test`.`t5`.`c` is null) and (`test`.`t6`.`a` >= 4 or `test`.`t6`.`c` is null) and (`test`.`t7`.`a` <= 2 or `test`.`t7`.`c` is null) and (`test`.`t8`.`a` < 1 or `test`.`t8`.`c` is null) and (`test`.`t8`.`b` = `test`.`t9`.`b` or `test`.`t8`.`c` is null) diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index 5504f4da81e..6d026bf7502 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -5494,6 +5494,11 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "rows_for_plan": 729, "cost_for_plan": 451.8615234, "semijoin_strategy_choice": [ + { + "strategy": "FirstMatch", + "records": 27, + "read_time": 665.225293 + }, { "strategy": "DuplicateWeedout", "records": 27, @@ -5651,35 +5656,26 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "t_outer_2", "t_inner_3" ], - "table": "t_inner_4", + "table": "t_inner_2", "best_access_path": { "considered_access_paths": [ { "access_type": "scan", - "resulting_rows": 3, - "cost": 2.005126953, + "resulting_rows": 9, + "cost": 2.015380859, "chosen": true } ], "chosen_access_method": { "type": "scan", - "records": 3, - "cost": 2.005126953, + "records": 9, + "cost": 2.015380859, "uses_join_buffering": true } }, - "rows_for_plan": 2187, - "cost_for_plan": 611.8461426, - "semijoin_strategy_choice": [ - { - "strategy": "FirstMatch", - "records": 81, - "read_time": 2232.809033 - }, - { - "chosen_strategy": "FirstMatch" - } - ], + "rows_for_plan": 6561, + "cost_for_plan": 1486.656396, + "semijoin_strategy_choice": [], "pruned_by_cost": true }, { @@ -5689,25 +5685,25 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "t_outer_2", "t_inner_3" ], - "table": "t_inner_2", + "table": "t_inner_4", "best_access_path": { "considered_access_paths": [ { "access_type": "scan", - "resulting_rows": 9, - "cost": 2.015380859, + "resulting_rows": 3, + "cost": 2.005126953, "chosen": true } ], "chosen_access_method": { "type": "scan", - "records": 9, - "cost": 2.015380859, + "records": 3, + "cost": 2.005126953, "uses_join_buffering": true } }, - "rows_for_plan": 6561, - "cost_for_plan": 1486.656396, + "rows_for_plan": 2187, + "cost_for_plan": 611.8461426, "semijoin_strategy_choice": [], "pruned_by_cost": true } @@ -5740,7 +5736,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "rest_of_plan": [ { "plan_prefix": ["t_outer_1", "t_inner_1", "t_inner_4"], - "table": "t_outer_2", + "table": "t_inner_2", "best_access_path": { "considered_access_paths": [ { @@ -5766,9 +5762,9 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "t_outer_1", "t_inner_1", "t_inner_4", - "t_outer_2" + "t_inner_2" ], - "table": "t_inner_2", + "table": "t_outer_2", "best_access_path": { "considered_access_paths": [ { @@ -5795,7 +5791,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "t_outer_1", "t_inner_1", "t_inner_4", - "t_outer_2" + "t_inner_2" ], "table": "t_inner_3", "best_access_path": { @@ -5823,7 +5819,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { }, { "plan_prefix": ["t_outer_1", "t_inner_1", "t_inner_4"], - "table": "t_inner_2", + "table": "t_outer_2", "best_access_path": { "considered_access_paths": [ { @@ -5896,7 +5892,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "rest_of_plan": [ { "plan_prefix": ["t_outer_1", "t_inner_1", "t_inner_3"], - "table": "t_outer_2", + "table": "t_inner_2", "best_access_path": { "considered_access_paths": [ { @@ -5922,27 +5918,27 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "t_outer_1", "t_inner_1", "t_inner_3", - "t_outer_2" + "t_inner_2" ], - "table": "t_inner_4", + "table": "t_outer_2", "best_access_path": { "considered_access_paths": [ { "access_type": "scan", - "resulting_rows": 3, - "cost": 2.005126953, + "resulting_rows": 9, + "cost": 2.015380859, "chosen": true } ], "chosen_access_method": { "type": "scan", - "records": 3, - "cost": 2.005126953, + "records": 9, + "cost": 2.015380859, "uses_join_buffering": true } }, - "rows_for_plan": 2187, - "cost_for_plan": 611.8461426, + "rows_for_plan": 6561, + "cost_for_plan": 1486.656396, "semijoin_strategy_choice": [], "pruned_by_cost": true }, @@ -5951,32 +5947,56 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "t_outer_1", "t_inner_1", "t_inner_3", - "t_outer_2" + "t_inner_2" ], - "table": "t_inner_2", + "table": "t_inner_4", "best_access_path": { "considered_access_paths": [ { "access_type": "scan", - "resulting_rows": 9, - "cost": 2.015380859, + "resulting_rows": 3, + "cost": 2.005126953, "chosen": true } ], "chosen_access_method": { "type": "scan", - "records": 9, - "cost": 2.015380859, + "records": 3, + "cost": 2.005126953, "uses_join_buffering": true } }, - "rows_for_plan": 6561, - "cost_for_plan": 1486.656396, + "rows_for_plan": 2187, + "cost_for_plan": 611.8461426, "semijoin_strategy_choice": [], "pruned_by_cost": true } ] }, + { + "plan_prefix": ["t_outer_1", "t_inner_1", "t_inner_3"], + "table": "t_outer_2", + "best_access_path": { + "considered_access_paths": [ + { + "access_type": "scan", + "resulting_rows": 9, + "cost": 2.015380859, + "chosen": true + } + ], + "chosen_access_method": { + "type": "scan", + "records": 9, + "cost": 2.015380859, + "uses_join_buffering": true + } + }, + "rows_for_plan": 729, + "cost_for_plan": 172.4410156, + "semijoin_strategy_choice": [], + "pruned_by_heuristic": true + }, { "plan_prefix": ["t_outer_1", "t_inner_1", "t_inner_3"], "table": "t_inner_4", @@ -6007,7 +6027,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "t_inner_3", "t_inner_4" ], - "table": "t_outer_2", + "table": "t_inner_2", "best_access_path": { "considered_access_paths": [ { @@ -6036,7 +6056,7 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "t_inner_3", "t_inner_4" ], - "table": "t_inner_2", + "table": "t_outer_2", "best_access_path": { "considered_access_paths": [ { @@ -6059,30 +6079,6 @@ t_outer_2.a in (select t_inner_3.a from t2 t_inner_3, t1 t_inner_4) { "pruned_by_cost": true } ] - }, - { - "plan_prefix": ["t_outer_1", "t_inner_1", "t_inner_3"], - "table": "t_inner_2", - "best_access_path": { - "considered_access_paths": [ - { - "access_type": "scan", - "resulting_rows": 9, - "cost": 2.015380859, - "chosen": true - } - ], - "chosen_access_method": { - "type": "scan", - "records": 9, - "cost": 2.015380859, - "uses_join_buffering": true - } - }, - "rows_for_plan": 729, - "cost_for_plan": 172.4410156, - "semijoin_strategy_choice": [], - "pruned_by_heuristic": true } ] } diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result index 40ab309fffd..2cd3cbdd8eb 100644 --- a/mysql-test/main/selectivity.result +++ b/mysql-test/main/selectivity.result @@ -326,8 +326,8 @@ group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate; id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort -1 PRIMARY eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00 1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00 +1 PRIMARY eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00 1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index 2 MATERIALIZED lineitem index NULL i_l_orderkey_quantity 13 NULL 6005 100.00 Using index Warnings: @@ -360,8 +360,8 @@ group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate; id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort -1 PRIMARY eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00 1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00 +1 PRIMARY eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00 1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index 2 MATERIALIZED lineitem index NULL i_l_orderkey_quantity 13 NULL 6005 100.00 Using index Warnings: diff --git a/mysql-test/main/subselect3.result b/mysql-test/main/subselect3.result index b3758a66a60..28187e0ffdd 100644 --- a/mysql-test/main/subselect3.result +++ b/mysql-test/main/subselect3.result @@ -1160,8 +1160,8 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY A ALL NULL NULL NULL NULL 10 1 PRIMARY B ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join) 1 PRIMARY E ALL NULL NULL NULL NULL 5 Start temporary; Using join buffer (flat, BNL join) -1 PRIMARY D ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) -1 PRIMARY C ALL NULL NULL NULL NULL 10 Using where; End temporary; Using join buffer (flat, BNL join) +1 PRIMARY C ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +1 PRIMARY D ALL NULL NULL NULL NULL 10 Using where; End temporary; Using join buffer (flat, BNL join) flush status; select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E where a+1 < 10000 + A.a + B.a +C.a+D.a); count(*) diff --git a/mysql-test/main/subselect3_jcl6.result b/mysql-test/main/subselect3_jcl6.result index ec799c07003..9df821e07dc 100644 --- a/mysql-test/main/subselect3_jcl6.result +++ b/mysql-test/main/subselect3_jcl6.result @@ -1163,8 +1163,8 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY A ALL NULL NULL NULL NULL 10 1 PRIMARY B ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join) 1 PRIMARY E ALL NULL NULL NULL NULL 5 Using where; Start temporary; Using join buffer (incremental, BNL join) -1 PRIMARY D hash_ALL NULL #hash#$hj 5 test.E.a 10 Using where; Using join buffer (incremental, BNLH join) -1 PRIMARY C ALL NULL NULL NULL NULL 10 Using where; End temporary; Using join buffer (incremental, BNL join) +1 PRIMARY C ALL NULL NULL NULL NULL 10 Using where; Using join buffer (incremental, BNL join) +1 PRIMARY D hash_ALL NULL #hash#$hj 5 test.E.a 10 Using where; End temporary; Using join buffer (incremental, BNLH join) flush status; select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E where a+1 < 10000 + A.a + B.a +C.a+D.a); count(*) diff --git a/mysql-test/main/subselect_sj.result b/mysql-test/main/subselect_sj.result index e9a484bbcbf..9febf3d4e5f 100644 --- a/mysql-test/main/subselect_sj.result +++ b/mysql-test/main/subselect_sj.result @@ -2500,8 +2500,8 @@ WHERE t1.a = t2.a AND t2.a IN (SELECT b FROM t3 STRAIGHT_JOIN t4); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t3 system NULL NULL NULL NULL 1 1 PRIMARY ALL distinct_key NULL NULL NULL 1 -1 PRIMARY t2 ref a a 5 const 1 Using index -1 PRIMARY t1 ref a a 5 func 1 Using index +1 PRIMARY t1 ref a a 5 const 1 Using index +1 PRIMARY t2 ref a a 5 func 1 Using index 2 MATERIALIZED t4 ALL NULL NULL NULL NULL 0 SELECT * FROM t1, t2 WHERE t1.a = t2.a AND t2.a IN (SELECT b FROM t3 STRAIGHT_JOIN t4); diff --git a/mysql-test/main/subselect_sj2.result b/mysql-test/main/subselect_sj2.result index cdf9707dcbd..db6393b909d 100644 --- a/mysql-test/main/subselect_sj2.result +++ b/mysql-test/main/subselect_sj2.result @@ -1129,8 +1129,8 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY alias3 ALL PRIMARY NULL NULL NULL # Using where 1 PRIMARY alias4 ref PRIMARY,c c 4 test.alias3.d # Using index 1 PRIMARY alias5 eq_ref PRIMARY PRIMARY 4 test.alias4.b # Using where; FirstMatch(alias3) -1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) 1 PRIMARY alias1 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) +1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) SELECT COUNT(*) FROM t1 AS alias1, t1 AS alias2, t2 AS alias3 WHERE alias3.d IN ( SELECT alias4.c FROM t2 AS alias4, t2 AS alias5 @@ -1150,8 +1150,8 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY alias3 ALL PRIMARY NULL NULL NULL # Using where 1 PRIMARY alias4 ref PRIMARY,c c 4 test.alias3.d # Using index 1 PRIMARY alias5 eq_ref PRIMARY PRIMARY 4 test.alias4.b # Using where; FirstMatch(alias3) -1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) 1 PRIMARY alias1 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) +1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) SELECT COUNT(*) FROM t1 AS alias1, t1 AS alias2, t2 AS alias3 WHERE alias3.d IN ( SELECT alias4.c FROM t2 AS alias4, t2 AS alias5 diff --git a/mysql-test/main/subselect_sj2_jcl6.result b/mysql-test/main/subselect_sj2_jcl6.result index 84317467e8a..b2ffb033788 100644 --- a/mysql-test/main/subselect_sj2_jcl6.result +++ b/mysql-test/main/subselect_sj2_jcl6.result @@ -1142,8 +1142,8 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY alias3 ALL PRIMARY NULL NULL NULL # Using where 1 PRIMARY alias4 ref PRIMARY,c c 4 test.alias3.d # Using index 1 PRIMARY alias5 eq_ref PRIMARY PRIMARY 4 test.alias4.b # Using where; FirstMatch(alias3) -1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) -1 PRIMARY alias1 ALL NULL NULL NULL NULL # Using join buffer (incremental, BNL join) +1 PRIMARY alias1 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) +1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (incremental, BNL join) SELECT COUNT(*) FROM t1 AS alias1, t1 AS alias2, t2 AS alias3 WHERE alias3.d IN ( SELECT alias4.c FROM t2 AS alias4, t2 AS alias5 @@ -1163,8 +1163,8 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY alias3 ALL PRIMARY NULL NULL NULL # Using where 1 PRIMARY alias4 ref PRIMARY,c c 4 test.alias3.d # Using index 1 PRIMARY alias5 eq_ref PRIMARY PRIMARY 4 test.alias4.b # Using where; FirstMatch(alias3) -1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) -1 PRIMARY alias1 ALL NULL NULL NULL NULL # Using join buffer (incremental, BNL join) +1 PRIMARY alias1 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) +1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (incremental, BNL join) SELECT COUNT(*) FROM t1 AS alias1, t1 AS alias2, t2 AS alias3 WHERE alias3.d IN ( SELECT alias4.c FROM t2 AS alias4, t2 AS alias5 diff --git a/mysql-test/main/subselect_sj2_mat.result b/mysql-test/main/subselect_sj2_mat.result index 54286f1fa82..b5eaa258410 100644 --- a/mysql-test/main/subselect_sj2_mat.result +++ b/mysql-test/main/subselect_sj2_mat.result @@ -1131,8 +1131,8 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY alias3 ALL PRIMARY NULL NULL NULL # Using where 1 PRIMARY alias4 ref PRIMARY,c c 4 test.alias3.d # Using index 1 PRIMARY alias5 eq_ref PRIMARY PRIMARY 4 test.alias4.b # Using where; FirstMatch(alias3) -1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) 1 PRIMARY alias1 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) +1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) SELECT COUNT(*) FROM t1 AS alias1, t1 AS alias2, t2 AS alias3 WHERE alias3.d IN ( SELECT alias4.c FROM t2 AS alias4, t2 AS alias5 @@ -1152,8 +1152,8 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY alias3 ALL PRIMARY NULL NULL NULL # Using where 1 PRIMARY alias4 ref PRIMARY,c c 4 test.alias3.d # Using index 1 PRIMARY alias5 eq_ref PRIMARY PRIMARY 4 test.alias4.b # Using where; FirstMatch(alias3) -1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) 1 PRIMARY alias1 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) +1 PRIMARY alias2 ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join) SELECT COUNT(*) FROM t1 AS alias1, t1 AS alias2, t2 AS alias3 WHERE alias3.d IN ( SELECT alias4.c FROM t2 AS alias4, t2 AS alias5 @@ -1933,19 +1933,19 @@ AND t3.id_product IN (SELECT id_product FROM t2 t2_4 WHERE t2_4.id_t2 = 34 OR t2 AND t3.id_product IN (SELECT id_product FROM t2 t2_5 WHERE t2_5.id_t2 = 29 OR t2_5.id_t2 = 28 OR t2_5.id_t2 = 26); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t3 index PRIMARY PRIMARY 4 NULL 18 Using index -1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 Using where +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 Using where 1 PRIMARY t5 ALL NULL NULL NULL NULL 18 Using where; Using join buffer (flat, BNL join) -1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 Using where 1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 Using where -1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 Using where 1 PRIMARY t4 eq_ref PRIMARY PRIMARY 8 test.t3.id_product,const 1 Using where; Using index -1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 Using where +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 Using where 1 PRIMARY t1 index NULL PRIMARY 8 NULL 73 Using where; Using index; Using join buffer (flat, BNL join) +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 Using where +1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 Using where +5 MATERIALIZED t2_4 range id_t2,id_product id_t2 5 NULL 18 Using index condition; Using where +6 MATERIALIZED t2_5 range id_t2,id_product id_t2 5 NULL 31 Using index condition; Using where 3 MATERIALIZED t2_2 ref id_t2,id_product id_t2 5 const 12 4 MATERIALIZED t2_3 range id_t2,id_product id_t2 5 NULL 33 Using index condition; Using where -6 MATERIALIZED t2_5 range id_t2,id_product id_t2 5 NULL 31 Using index condition; Using where 2 MATERIALIZED t2_1 ALL id_t2,id_product NULL NULL NULL 223 Using where -5 MATERIALIZED t2_4 range id_t2,id_product id_t2 5 NULL 18 Using index condition; Using where set optimizer_switch='rowid_filter=default'; drop table t1,t2,t3,t4,t5; set global innodb_stats_persistent= @innodb_stats_persistent_save; diff --git a/mysql-test/main/subselect_sj_jcl6.result b/mysql-test/main/subselect_sj_jcl6.result index c485b5e5f39..9eac5c65f82 100644 --- a/mysql-test/main/subselect_sj_jcl6.result +++ b/mysql-test/main/subselect_sj_jcl6.result @@ -2511,8 +2511,8 @@ WHERE t1.a = t2.a AND t2.a IN (SELECT b FROM t3 STRAIGHT_JOIN t4); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t3 system NULL NULL NULL NULL 1 1 PRIMARY ALL distinct_key NULL NULL NULL 1 -1 PRIMARY t2 ref a a 5 const 1 Using index -1 PRIMARY t1 ref a a 5 func 1 Using index +1 PRIMARY t1 ref a a 5 const 1 Using index +1 PRIMARY t2 ref a a 5 func 1 Using index 2 MATERIALIZED t4 ALL NULL NULL NULL NULL 0 SELECT * FROM t1, t2 WHERE t1.a = t2.a AND t2.a IN (SELECT b FROM t3 STRAIGHT_JOIN t4); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index a059986d611..32f5e41e50d 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -9176,8 +9176,13 @@ greedy_search(JOIN *join, while (pos && best_table != pos) pos= join->best_ref[++best_idx]; DBUG_ASSERT((pos != NULL)); // should always find 'best_table' - /* move 'best_table' at the first free position in the array of joins */ - swap_variables(JOIN_TAB*, join->best_ref[idx], join->best_ref[best_idx]); + /* + move 'best_table' at the first free position in the array of joins, + keeping the sorted table order intact + */ + memmove(join->best_ref + idx + 1, join->best_ref + idx, + sizeof(JOIN_TAB*) * (best_idx - idx)); + join->best_ref[idx]= best_table; /* compute the cost of the new plan extended with 'best_table' */ record_count= COST_MULT(record_count, join->positions[idx].records_read); @@ -9924,7 +9929,7 @@ best_extension_by_limited_search(JOIN *join, 'join' is a partial plan with lower cost than the best plan so far, so continue expanding it further with the tables in 'remaining_tables'. */ - JOIN_TAB *s; + JOIN_TAB *s, **pos; double best_record_count= DBL_MAX; double best_read_time= DBL_MAX; bool disable_jbuf= join->thd->variables.join_cache_level == 0; @@ -9944,7 +9949,7 @@ best_extension_by_limited_search(JOIN *join, DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time, "part_plan");); - /* + /* If we are searching for the execution plan of a materialized semi-join nest then allowed_tables contains bits only for the tables from this nest. */ @@ -9952,11 +9957,13 @@ best_extension_by_limited_search(JOIN *join, if (join->emb_sjm_nest) allowed_tables= join->emb_sjm_nest->sj_inner_tables & ~join->const_table_map; - for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++) + for (pos= join->best_ref + idx ; (s= *pos) ; pos++) { table_map real_table_bit= s->table->map; DBUG_ASSERT(remaining_tables & real_table_bit); + swap_variables(JOIN_TAB*, join->best_ref[idx], *pos); + if ((allowed_tables & real_table_bit) && !(remaining_tables & s->dependent) && !check_interleaving_with_nj(s)) @@ -10069,7 +10076,6 @@ best_extension_by_limited_search(JOIN *join, allowed_tables) { /* Recursively expand the current partial plan */ - swap_variables(JOIN_TAB*, join->best_ref[idx], *pos); Json_writer_array trace_rest(thd, "rest_of_plan"); best_res= best_extension_by_limited_search(join, @@ -10082,8 +10088,7 @@ best_extension_by_limited_search(JOIN *join, prune_level, use_cond_selectivity); if ((int) best_res < (int) SEARCH_OK) - DBUG_RETURN(best_res); // Abort - swap_variables(JOIN_TAB*, join->best_ref[idx], *pos); + goto end; // Return best_res if (best_res == SEARCH_FOUND_EDGE && check_if_edge_table(join->positions+ idx, pushdown_cond_selectivity) != @@ -10128,11 +10133,27 @@ best_extension_by_limited_search(JOIN *join, if (best_res == SEARCH_FOUND_EDGE) { trace_one_table.add("pruned_by_hanging_leaf", true); - DBUG_RETURN(best_res); + goto end; } } } - DBUG_RETURN(SEARCH_OK); + best_res= SEARCH_OK; + +end: + /* Restore original table order */ + if (!*pos) + pos--; // Revert last pos++ in for loop + if (pos != join->best_ref + idx) + { + JOIN_TAB *tmp= join->best_ref[idx]; + uint elements= (uint) (pos - (join->best_ref + idx)); + + memmove((void*) (join->best_ref + idx), + (void*) (join->best_ref + idx + 1), + elements * sizeof(JOIN_TAB*)); + *pos= tmp; + } + DBUG_RETURN(best_res); } -- cgit v1.2.1 From 432a4ebe5cd2ebf4d0fad79092e82e5d1a9f53ba Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Wed, 18 May 2022 22:17:32 +0300 Subject: Improve table pruning in optimizer with up to date key_dependent map Part of: MDEV-28073 Slow query performance in MariaDB when using many tables s->key_dependent has a list of tables that are compared with key fields in the current table. However it does not take into account if a key field could be resolved by another table. This is because MariaDB expands 'join_tab->keyuse' to include all generated comparisons. For example: SELECT * from t1,t2,t3 where t1.key=t2.key and t2.key=t3.key In this case keyuse for t1 includes t2.key and t3.key and key_dependent contains 't2.map | t3.map' If we in best_extension_by_limited_search() consider t2,t1 then t1's key is fully defined, but we cannot do any prune of plans as s->key_dependent indicates that t3 is still needed. Fixed by calculating in best_access_patch the current key_dependent map of tables that is needed to satisfy all keys. This allows us to prune more bad plans earlier as soon as all keys can be used. We also set key_dependent to 0 if we found an EQ_REF key, as this an optimal key for the table and there is no reason to check more keys. --- mysql-test/main/derived_cond_pushdown.result | 135 ++++++++++++++------------- mysql-test/main/join_nested.result | 4 +- mysql-test/main/join_nested_jcl6.result | 4 +- mysql-test/main/selectivity_innodb.result | 4 +- sql/sql_select.cc | 67 +++++++++++-- sql/sql_select.h | 4 + 6 files changed, 135 insertions(+), 83 deletions(-) diff --git a/mysql-test/main/derived_cond_pushdown.result b/mysql-test/main/derived_cond_pushdown.result index d15f368e08d..5b0cc4a08f5 100644 --- a/mysql-test/main/derived_cond_pushdown.result +++ b/mysql-test/main/derived_cond_pushdown.result @@ -9684,11 +9684,22 @@ EXPLAIN "query_block": { "select_id": 1, "table": { - "table_name": "", + "table_name": "t1", "access_type": "ALL", "rows": 2, "filtered": 100, - "attached_condition": "1 in (0,dt1.a)", + "attached_condition": "1 in (0,t1.a) and t1.a is not null" + }, + "table": { + "table_name": "", + "access_type": "ref", + "possible_keys": ["key0"], + "key": "key0", + "key_length": "5", + "used_key_parts": ["a"], + "ref": ["test.t1.a"], + "rows": 2, + "filtered": 100, "materialized": { "query_block": { "select_id": 2, @@ -9706,18 +9717,6 @@ EXPLAIN } } } - }, - "block-nl-join": { - "table": { - "table_name": "t1", - "access_type": "ALL", - "rows": 2, - "filtered": 100 - }, - "buffer_type": "flat", - "buffer_size": "65", - "join_type": "BNL", - "attached_condition": "t1.a = dt1.a" } } } @@ -9743,11 +9742,22 @@ EXPLAIN "query_block": { "select_id": 1, "table": { - "table_name": "", + "table_name": "t1", "access_type": "ALL", "rows": 2, "filtered": 100, - "attached_condition": "dt.a in (1,dt.a)", + "attached_condition": "t1.a in (1,t1.a) and t1.a is not null" + }, + "table": { + "table_name": "", + "access_type": "ref", + "possible_keys": ["key0"], + "key": "key0", + "key_length": "5", + "used_key_parts": ["a"], + "ref": ["test.t1.a"], + "rows": 2, + "filtered": 100, "materialized": { "query_block": { "select_id": 2, @@ -9765,18 +9775,6 @@ EXPLAIN } } } - }, - "block-nl-join": { - "table": { - "table_name": "t1", - "access_type": "ALL", - "rows": 2, - "filtered": 100 - }, - "buffer_type": "flat", - "buffer_size": "119", - "join_type": "BNL", - "attached_condition": "t1.a = dt.a" } } } @@ -10376,11 +10374,22 @@ EXPLAIN "query_block": { "select_id": 1, "table": { - "table_name": "", + "table_name": "t1", "access_type": "ALL", "rows": 2, "filtered": 100, - "attached_condition": "t.f2 < 2", + "attached_condition": "t1.f2 < 2 and t1.f2 is not null" + }, + "table": { + "table_name": "", + "access_type": "ref", + "possible_keys": ["key0"], + "key": "key0", + "key_length": "5", + "used_key_parts": ["f2"], + "ref": ["test.t1.f2"], + "rows": 2, + "filtered": 100, "materialized": { "query_block": { "select_id": 3, @@ -10393,13 +10402,6 @@ EXPLAIN } } } - }, - "table": { - "table_name": "t1", - "access_type": "ALL", - "rows": 2, - "filtered": 100, - "attached_condition": "t1.f2 = t.f2" } } } @@ -10417,11 +10419,22 @@ EXPLAIN "query_block": { "select_id": 1, "table": { - "table_name": "", + "table_name": "t1", "access_type": "ALL", "rows": 2, "filtered": 100, - "attached_condition": "t.f2 < 2", + "attached_condition": "t1.f2 < 2 and t1.f2 is not null" + }, + "table": { + "table_name": "", + "access_type": "ref", + "possible_keys": ["key0"], + "key": "key0", + "key_length": "5", + "used_key_parts": ["f2"], + "ref": ["test.t1.f2"], + "rows": 1, + "filtered": 100, "materialized": { "query_block": { "select_id": 3, @@ -10436,18 +10449,6 @@ EXPLAIN } } } - }, - "block-nl-join": { - "table": { - "table_name": "t1", - "access_type": "ALL", - "rows": 2, - "filtered": 100 - }, - "buffer_type": "flat", - "buffer_size": "65", - "join_type": "BNL", - "attached_condition": "t1.f2 = t.f2" } } } @@ -14388,8 +14389,8 @@ a b c a b c 3 21 500 3 21 231 explain select * from v1,t2 where (v1.b=t2.b) and (v1.a<4); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY ALL NULL NULL NULL NULL 3 Using where -1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where; Using join buffer (flat, BNL join) +1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where +1 PRIMARY ref key0 key0 5 test.t2.b 2 Using where 2 DERIVED t3 range i1 i1 5 NULL 2 Using index condition 3 UNION t3 range i1 i1 5 NULL 1 Using index condition NULL UNION RESULT ALL NULL NULL NULL NULL NULL @@ -14399,9 +14400,21 @@ EXPLAIN "query_block": { "select_id": 1, "table": { - "table_name": "", + "table_name": "t2", "access_type": "ALL", - "rows": 3, + "rows": 9, + "filtered": 100, + "attached_condition": "t2.b is not null" + }, + "table": { + "table_name": "", + "access_type": "ref", + "possible_keys": ["key0"], + "key": "key0", + "key_length": "5", + "used_key_parts": ["b"], + "ref": ["test.t2.b"], + "rows": 2, "filtered": 100, "attached_condition": "v1.a < 4", "materialized": { @@ -14447,18 +14460,6 @@ EXPLAIN } } } - }, - "block-nl-join": { - "table": { - "table_name": "t2", - "access_type": "ALL", - "rows": 9, - "filtered": 100 - }, - "buffer_type": "flat", - "buffer_size": "173", - "join_type": "BNL", - "attached_condition": "t2.b = v1.b" } } } diff --git a/mysql-test/main/join_nested.result b/mysql-test/main/join_nested.result index 0c7a1b48940..5f26b03e0d1 100644 --- a/mysql-test/main/join_nested.result +++ b/mysql-test/main/join_nested.result @@ -1476,9 +1476,9 @@ join t5 on t5.a=t3.b) on t3.a=t2.b; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 ALL NULL NULL NULL NULL X 1 SIMPLE t3 ref a a 5 test.t2.b X Using where -1 SIMPLE t5 ref a a 5 test.t3.b X -1 SIMPLE t4 ref a a 5 test.t5.a X Using where +1 SIMPLE t4 ref a a 5 test.t3.b X Using where 1 SIMPLE t6 ref a a 5 test.t4.b X +1 SIMPLE t5 ref a a 5 test.t3.b X drop table t0, t1, t2, t3, t4, t5, t6, t7; create table t1 (a int); insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); diff --git a/mysql-test/main/join_nested_jcl6.result b/mysql-test/main/join_nested_jcl6.result index 31f5c794071..5db6d030965 100644 --- a/mysql-test/main/join_nested_jcl6.result +++ b/mysql-test/main/join_nested_jcl6.result @@ -1485,9 +1485,9 @@ join t5 on t5.a=t3.b) on t3.a=t2.b; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 ALL NULL NULL NULL NULL X 1 SIMPLE t3 ref a a 5 test.t2.b X Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan -1 SIMPLE t5 ref a a 5 test.t3.b X Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan -1 SIMPLE t4 ref a a 5 test.t5.a X Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan +1 SIMPLE t4 ref a a 5 test.t3.b X Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan 1 SIMPLE t6 ref a a 5 test.t4.b X Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan +1 SIMPLE t5 ref a a 5 test.t3.b X Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan drop table t0, t1, t2, t3, t4, t5, t6, t7; create table t1 (a int); insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); diff --git a/mysql-test/main/selectivity_innodb.result b/mysql-test/main/selectivity_innodb.result index 5457db21436..07cdf15163c 100644 --- a/mysql-test/main/selectivity_innodb.result +++ b/mysql-test/main/selectivity_innodb.result @@ -331,8 +331,8 @@ group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate; id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort -1 PRIMARY eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00 1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00 +1 PRIMARY eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00 1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index 2 MATERIALIZED lineitem index NULL PRIMARY 8 NULL 6005 100.00 Warnings: @@ -365,8 +365,8 @@ group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate; id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY orders ALL PRIMARY,i_o_custkey NULL NULL NULL 1500 100.00 Using where; Using temporary; Using filesort -1 PRIMARY eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00 1 PRIMARY customer eq_ref PRIMARY PRIMARY 4 dbt3_s001.orders.o_custkey 1 100.00 +1 PRIMARY eq_ref distinct_key distinct_key 4 dbt3_s001.orders.o_orderkey 1 100.00 1 PRIMARY lineitem ref PRIMARY,i_l_orderkey,i_l_orderkey_quantity i_l_orderkey_quantity 4 dbt3_s001.orders.o_orderkey 4 100.00 Using index 2 MATERIALIZED lineitem index NULL PRIMARY 8 NULL 6005 100.00 Warnings: diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 32f5e41e50d..12a1b74e1ec 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -392,6 +392,7 @@ POSITION::POSITION() ref_depend_map= dups_producing_tables= 0; inner_tables_handled_with_other_sjs= 0; type= JT_UNKNOWN; + key_dependent= 0; dups_weedout_picker.set_empty(); firstmatch_picker.set_empty(); loosescan_picker.set_empty(); @@ -6291,7 +6292,11 @@ add_key_field(JOIN *join, Field IN ... */ if (field->flags & PART_KEY_FLAG) - stat[0].key_dependent|=used_tables; + { + stat[0].key_dependent|= used_tables; + if (field->key_start.bits_set()) + stat[0].key_start_dependent= 1; + } bool is_const=1; for (uint i=0; ielements-1 ; i++,use++) { if (!use->is_for_hash_join()) @@ -7699,6 +7705,13 @@ best_access_path(JOIN *join, double best_time= DBL_MAX; double records= DBL_MAX; table_map best_ref_depends_map= 0; + /* + key_dependent is 0 if all key parts could be used or if there was an + EQ_REF table found (which uses all key parts). In other words, we cannot + find a better key for the table even if remaining_tables is reduced. + Otherwise it's a bitmap of tables that could improve key usage. + */ + table_map key_dependent= 0; Range_rowid_filter_cost_info *best_filter= 0; double tmp; ha_rows rec; @@ -7750,6 +7763,8 @@ best_access_path(JOIN *join, key_part_map const_part= 0; /* The or-null keypart in ref-or-null access: */ key_part_map ref_or_null_part= 0; + key_part_map all_parts= 0; + if (is_hash_join_key_no(key)) { /* @@ -7781,15 +7796,16 @@ best_access_path(JOIN *join, do /* For each keypart */ { uint keypart= keyuse->keypart; - table_map best_part_found_ref= 0; + table_map best_part_found_ref= 0, key_parts_dependent= 0; double best_prev_record_reads= DBL_MAX; - + do /* For each way to access the keypart */ { /* if 1. expression doesn't refer to forward tables 2. we won't get two ref-or-null's */ + all_parts|= keyuse->keypart_map; if (!(remaining_tables & keyuse->used_tables) && (!keyuse->validity_ref || *keyuse->validity_ref) && s->access_from_tables_is_allowed(keyuse->used_tables, @@ -7798,6 +7814,7 @@ best_access_path(JOIN *join, KEY_OPTIMIZE_REF_OR_NULL))) { found_part|= keyuse->keypart_map; + key_parts_dependent= 0; if (!(keyuse->used_tables & ~join->const_table_map)) const_part|= keyuse->keypart_map; @@ -7820,10 +7837,16 @@ best_access_path(JOIN *join, if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) ref_or_null_part |= keyuse->keypart_map; } + else if (!(found_part & keyuse->keypart_map)) + key_parts_dependent|= keyuse->used_tables; + loose_scan_opt.add_keyuse(remaining_tables, keyuse); keyuse++; } while (keyuse->table == table && keyuse->key == key && keyuse->keypart == keypart); + /* If we found a usable key, remember the dependent tables */ + if (all_parts & 1) + key_dependent|= key_parts_dependent; found_ref|= best_part_found_ref; } while (keyuse->table == table && keyuse->key == key); @@ -8210,6 +8233,24 @@ best_access_path(JOIN *join, } /* for each key */ records= best_records; } + else + { + /* + No usable keys found. However, there may still be an option to use + "Range checked for each record" when all depending tables has + been read. s->key_dependent tells us which tables these could be and + s->key_start_dependent tells us if a first key part was used. + s->key_dependent may include more tables than could be used, + but this is ok as not having any usable keys is a rare thing and + the performance penalty for extra table bits is that + best_extension_by_limited_search() would not be able to prune tables + earlier. + Example query: + SELECT * FROM t1,t2 where t1.key1=t2.key1 OR t2.key2<1 + */ + if (s->key_start_dependent) + key_dependent= s->key_dependent; + } /* If there is no key to access the table, but there is an equi-join @@ -8461,6 +8502,8 @@ best_access_path(JOIN *join, pos->use_join_buffer= best_uses_jbuf; pos->spl_plan= spl_plan; pos->range_rowid_filter_info= best_filter; + pos->key_dependent= (best_type == JT_EQ_REF ? (table_map) 0 : + key_dependent & remaining_tables); loose_scan_opt.save_to_position(s, loose_scan_pos); @@ -9401,10 +9444,7 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s, double sel= 1.0; COND_EQUAL *cond_equal= join->cond_equal; - if (!cond_equal || !cond_equal->current_level.elements) - return sel; - - if (!s->keyuse) + if (!cond_equal || !cond_equal->current_level.elements || !s->keyuse) return sel; Item_equal *item_equal; @@ -10028,11 +10068,18 @@ best_extension_by_limited_search(JOIN *join, (idx == join->const_tables && // 's' is the first table in the QEP s->table == join->sort_by_table)) { + /* + Store the current record count and cost as the best + possible cost at this level if the following holds: + - It's the lowest record number and cost so far + - There is no remaing table that could improve index usage + or we found an EQ_REF or REF key with less than 2 + matching records (good enough). + */ if (best_record_count >= current_record_count && best_read_time >= current_read_time && - /* TODO: What is the reasoning behind this condition? */ - (!(s->key_dependent & allowed_tables & remaining_tables) || - join->positions[idx].records_read < 2.0)) + (!(position->key_dependent & allowed_tables) || + position->records_read < 2.0)) { best_record_count= current_record_count; best_read_time= current_read_time; diff --git a/sql/sql_select.h b/sql/sql_select.h index e65267558e1..7a72d0efe42 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -376,6 +376,8 @@ typedef struct st_join_table { uint used_null_fields; uint used_uneven_bit_fields; enum join_type type; + /* If first key part is used for any key in 'key_dependent' */ + bool key_start_dependent; bool cached_eq_ref_table,eq_ref_table; bool shortcut_for_distinct; bool sorted; @@ -958,6 +960,8 @@ public: /* If ref-based access is used: bitmap of tables this table depends on */ table_map ref_depend_map; + /* tables that may help best_access_path() to find a better key */ + table_map key_dependent; /* Bitmap of semi-join inner tables that are in the join prefix and for which there's no provision for how to eliminate semi-join duplicates -- cgit v1.2.1 From 31811cf81d2d82d0260322c8c1bf7b2e046a4712 Mon Sep 17 00:00:00 2001 From: Michael Widenius Date: Sun, 22 May 2022 20:46:03 +0300 Subject: Make join->key_dependent up to date for derived tables Main-author: Sergei Petrunia --- sql/sql_select.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 12a1b74e1ec..c71911f12cd 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7641,7 +7641,7 @@ double cost_for_index_read(const THD *thd, const TABLE *table, uint key, Adjust cost from table->quick_costs calculated by multi_range_read_info_const() to be comparable with cost_for_index_read() - This functions is needed because best_access_patch doesn't add + This functions is needed because best_access_path() doesn't add TIME_FOR_COMPARE to it's costs until very late. Preferably we should fix so that all costs are comparably. (All compared costs should include TIME_FOR_COMPARE for all found @@ -8251,6 +8251,9 @@ best_access_path(JOIN *join, if (s->key_start_dependent) key_dependent= s->key_dependent; } + /* Check that s->key_dependent contains all used_tables found in s->keyuse */ + key_dependent&= ~PSEUDO_TABLE_BITS; + DBUG_ASSERT((key_dependent & s->key_dependent) == key_dependent); /* If there is no key to access the table, but there is an equi-join @@ -12600,6 +12603,7 @@ bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys) (uchar *) &first_keyuse)) { + JOIN_TAB *tab; first_keyuse= save_first_keyuse; if (table->add_tmp_key(table->s->keys, parts, get_next_field_for_derived_key, @@ -12607,6 +12611,9 @@ bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys) FALSE)) return TRUE; table->reginfo.join_tab->keys.set_bit(table->s->keys); + tab= table->reginfo.join_tab; + for (uint i=0; i < parts; i++) + tab->key_dependent|= save_first_keyuse[i].used_tables; } else { -- cgit v1.2.1 From 1de18a836f1abbe76632337f9296f402bbdf4cfc Mon Sep 17 00:00:00 2001 From: Monty Date: Fri, 3 Jun 2022 19:08:30 +0300 Subject: Updated aria_dump_log - Print correct server version for header - Updated version number - One can now specify file name last (without -f) --- storage/maria/aria_dump_log.c | 10 +++++++--- storage/maria/ma_loghandler.c | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/storage/maria/aria_dump_log.c b/storage/maria/aria_dump_log.c index 17af368c424..e64c97fcda3 100644 --- a/storage/maria/aria_dump_log.c +++ b/storage/maria/aria_dump_log.c @@ -66,7 +66,7 @@ static struct my_option my_long_options[] = static void print_version(void) { - printf("%s Ver 1.0 for %s on %s\n", + printf("%s Ver 1.1 for %s on %s\n", my_progname_short, SYSTEM_TYPE, MACHINE_TYPE); } @@ -78,8 +78,9 @@ static void usage(void) puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); puts("and you are welcome to modify and redistribute it under the GPL license\n"); - puts("Dump content of aria log pages."); - printf("\nUsage: %s -f file OPTIONS\n", my_progname_short); + puts("Dump the raw content of aria log pages."); + puts("For a logical dump, use aria_read_log"); + printf("\nUsage: %s OPTIONS aria_log_file\n", my_progname_short); my_print_help(my_long_options); print_defaults("my", load_default_groups); my_print_variables(my_long_options); @@ -115,6 +116,9 @@ static void get_options(int *argc,char ***argv) if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) exit(ho_error); + if (opt_file == NULL && *argc == 1) + opt_file= **argv; + if (opt_file == NULL) { usage(); diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 86e7fa93eaa..710fde2801e 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -1219,12 +1219,12 @@ void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc, ptr+= 4; desc->mysql_version= uint4korr(ptr); ptr+= 4; - desc->server_id= uint4korr(ptr + 4); + desc->server_id= uint4korr(ptr); ptr+= 4; desc->page_size= uint2korr(ptr) + 1; ptr+= 2; desc->file_number= uint3korr(ptr); - ptr+=3; + ptr+= 3; desc->max_lsn= lsn_korr(ptr); } -- cgit v1.2.1 From 3d241eb948855dbe0688a04c8111cc78deac3c1c Mon Sep 17 00:00:00 2001 From: Monty Date: Mon, 6 Jun 2022 15:22:24 +0300 Subject: Improve error reporting in Aria This patch fixes the following issues in Aria error reporting in case of read errors & crashed tables: - Added the table name to the most error messages, including in case of read errors or when encrypting/decrypting a table. The format for error messages was changed sligtly to accomodate logging of errors from lower level routines. - If we got an read error from storage (hard disk, ssd, S3 etc) we only reported 'table is crashed'. Now the error number from the storage is reported. - Added checking of read failure from records_in_range() - Calls to ma_set_fatal_error() did not inform the SQL level of errors (to not spam the user with multiple error messages). Now the first error message and any fatal error messages are reported to the user. --- mysql-test/main/grant_repair.result | 2 +- mysql-test/suite/maria/encrypt-no-key.result | 10 ++-- mysql-test/suite/maria/encrypt-no-key.test | 8 ++- mysql-test/suite/maria/maria-recover.result | 5 +- mysql-test/suite/maria/maria-recover.test | 6 +- mysql-test/suite/maria/rollback.result | 8 +-- mysql-test/suite/maria/rollback.test | 2 +- sql/multi_range_read.cc | 2 + storage/maria/ha_maria.cc | 1 + storage/maria/ma_bitmap.c | 5 ++ storage/maria/ma_blockrec.c | 45 ++++++++++----- storage/maria/ma_cache.c | 2 +- storage/maria/ma_crypt.c | 5 +- storage/maria/ma_delete.c | 12 ++-- storage/maria/ma_dynrec.c | 20 +++---- storage/maria/ma_extra.c | 7 ++- storage/maria/ma_info.c | 83 ++++++++++++++++++++++++++-- storage/maria/ma_key.c | 4 +- storage/maria/ma_key_recover.c | 15 +++-- storage/maria/ma_keycache.c | 2 +- storage/maria/ma_locking.c | 27 +-------- storage/maria/ma_open.c | 15 ++--- storage/maria/ma_packrec.c | 12 ++-- storage/maria/ma_page.c | 9 ++- storage/maria/ma_rkey.c | 2 +- storage/maria/ma_search.c | 28 ++++++---- storage/maria/ma_statrec.c | 2 +- storage/maria/ma_update.c | 10 ++-- storage/maria/ma_write.c | 15 ++--- storage/maria/maria_def.h | 22 ++------ 30 files changed, 235 insertions(+), 151 deletions(-) diff --git a/mysql-test/main/grant_repair.result b/mysql-test/main/grant_repair.result index d97bcc38a08..6ebe043e4d4 100644 --- a/mysql-test/main/grant_repair.result +++ b/mysql-test/main/grant_repair.result @@ -3,7 +3,7 @@ call mtr.add_suppression("mysql.user"); flush tables; flush privileges; Warnings: -Error 145 Table './mysql/user' is marked as crashed and should be repaired +Error 145 Got error '145 "Table was marked as crashed and should be repaired"' for './mysql/user' Warning 1034 12544 clients are using or haven't closed the table properly Note 1034 Table is fixed # switching back from mysql.user to mysql.global_priv diff --git a/mysql-test/suite/maria/encrypt-no-key.result b/mysql-test/suite/maria/encrypt-no-key.result index 6745670dfac..bdc8a79ff9a 100644 --- a/mysql-test/suite/maria/encrypt-no-key.result +++ b/mysql-test/suite/maria/encrypt-no-key.result @@ -1,15 +1,17 @@ -call mtr.add_suppression('Unknown key id 1. Can''t continue'); +call mtr.add_suppression('Unknown key id 1'); +call mtr.add_suppression('try to repair it'); +call mtr.add_suppression('Index is corrupted'); set global aria_encrypt_tables= 1; create table t1 (pk int primary key, a int, key(a)) engine=aria transactional=1; alter table t1 disable keys; insert into t1 values (1,1); alter table t1 enable keys; -ERROR HY000: Unknown key id 1. Can't continue! +ERROR HY000: Unknown key id 1 for ./test/t1. Can't continue! repair table t1 use_frm; Table Op Msg_type Msg_text test.t1 repair warning Number of rows changed from 0 to 1 -test.t1 repair Error Unknown key id 1. Can't continue! -test.t1 repair Error Unknown key id 1. Can't continue! +test.t1 repair Error Unknown key id 1 for ./test/t1. Can't continue! +test.t1 repair Error Unknown key id 1 for ./test/t1. Can't continue! test.t1 repair status OK drop table t1; set global aria_encrypt_tables= default; diff --git a/mysql-test/suite/maria/encrypt-no-key.test b/mysql-test/suite/maria/encrypt-no-key.test index 2d586c50695..eebc2a102d3 100644 --- a/mysql-test/suite/maria/encrypt-no-key.test +++ b/mysql-test/suite/maria/encrypt-no-key.test @@ -1,14 +1,18 @@ # # MDEV-18496 Crash when Aria encryption is enabled but plugin not available # -call mtr.add_suppression('Unknown key id 1. Can''t continue'); +call mtr.add_suppression('Unknown key id 1'); +call mtr.add_suppression('try to repair it'); +call mtr.add_suppression('Index is corrupted'); set global aria_encrypt_tables= 1; create table t1 (pk int primary key, a int, key(a)) engine=aria transactional=1; alter table t1 disable keys; insert into t1 values (1,1); -error 192; +--replace_result \\ / +--error 192 alter table t1 enable keys; +--replace_result \\ / repair table t1 use_frm; drop table t1; set global aria_encrypt_tables= default; diff --git a/mysql-test/suite/maria/maria-recover.result b/mysql-test/suite/maria/maria-recover.result index 8a33307b2b0..788cd7eaf5a 100644 --- a/mysql-test/suite/maria/maria-recover.result +++ b/mysql-test/suite/maria/maria-recover.result @@ -26,9 +26,10 @@ a ThursdayMorningsMarket ThursdayMorningsMarketb Warnings: -Error 145 t_corrupted2' is marked as crashed and should be repaired +Error 145 Got error '145 "Table was marked as crashed and should be repaired"' for './mysqltest/t_corrupted2' Warning 1034 1 client is using or hasn't closed the table properly -Error 1034 Wrong base information on indexpage at page: 1 +Error 176 Got error '176 "Read page with wrong checksum"' for './mysqltest/t_corrupted2.MAI' +Error 1034 Can't read indexpage from page: 1, error: 176 select * from t_corrupted2; a ThursdayMorningsMarket diff --git a/mysql-test/suite/maria/maria-recover.test b/mysql-test/suite/maria/maria-recover.test index 36fa41d9adb..cea185e7ab5 100644 --- a/mysql-test/suite/maria/maria-recover.test +++ b/mysql-test/suite/maria/maria-recover.test @@ -20,8 +20,8 @@ select count(*) from mysql.proc; # account for Unix and Windows variation. call mtr.add_suppression("Checking table: '\\..mysqltest.t_corrupted2'"); call mtr.add_suppression("Recovering table: '\\..mysqltest.t_corrupted2'"); -call mtr.add_suppression("Table '\\..mysqltest.t_corrupted2' is marked as crashed and should be repaired"); -call mtr.add_suppression("Table 't_corrupted2' is marked as crashed and should be repaired"); +call mtr.add_suppression("Table was marked as crashed and should be repaired"); +call mtr.add_suppression("Read page with wrong checksum"); let $def_checkinterval=`select @@global.aria_checkpoint_interval`; @@ -78,7 +78,7 @@ perl; syswrite (FILE, $whatever) or die; close FILE; EOF -replace_regex /Table.*t_corrupted2/t_corrupted2/ ; +--replace_result \\ / --enable_prepare_warnings select * from t_corrupted2; # should show corruption and repair messages --disable_prepare_warnings diff --git a/mysql-test/suite/maria/rollback.result b/mysql-test/suite/maria/rollback.result index fd2e012805a..959f596edf2 100644 --- a/mysql-test/suite/maria/rollback.result +++ b/mysql-test/suite/maria/rollback.result @@ -1,4 +1,4 @@ -call mtr.add_suppression("Table '.*' is marked as crashed and should be repaired"); +call mtr.add_suppression("Table was marked as crashed"); call mtr.add_suppression("Checking table: .*"); create table t1 (a int primary key auto_increment, b int) engine=aria transactional= 1; create table t2 (a int primary key auto_increment, b int) engine=aria transactional= 0; @@ -54,7 +54,7 @@ a b 10 11 11 12 Warnings: -Error 145 Table './test/t2' is marked as crashed and should be repaired +Error 145 Got error '145 "Table was marked as crashed and should be repaired"' for './test/t2' Warning 1034 1 client is using or hasn't closed the table properly Note 1034 Table is fixed insert into t1 (b) values (100),(200); @@ -92,7 +92,7 @@ NEXT VALUE for s1 seq 11 3 12 4 Warnings: -Error 145 Table './test/s1' is marked as crashed and should be repaired +Error 145 Got error '145 "Table was marked as crashed and should be repaired"' for './test/s1' Warning 1034 1 client is using or hasn't closed the table properly Note 1034 Table is fixed drop table t1,t2; @@ -100,7 +100,7 @@ drop sequence s1; include/show_binlog_events.inc Log_name Pos Event_type Server_id End_log_pos Info master-bin.000001 # Gtid # # BEGIN GTID #-#-# -master-bin.000001 # Query # # use `mtr`; INSERT INTO test_suppressions (pattern) VALUES ( NAME_CONST('pattern',_latin1'Table \'.*\' is marked as crashed and should be repaired' COLLATE 'latin1_swedish_ci')) +master-bin.000001 # Query # # use `mtr`; INSERT INTO test_suppressions (pattern) VALUES ( NAME_CONST('pattern',_latin1'Table was marked as crashed' COLLATE 'latin1_swedish_ci')) master-bin.000001 # Query # # COMMIT master-bin.000001 # Gtid # # BEGIN GTID #-#-# master-bin.000001 # Query # # use `mtr`; INSERT INTO test_suppressions (pattern) VALUES ( NAME_CONST('pattern',_latin1'Checking table: .*' COLLATE 'latin1_swedish_ci')) diff --git a/mysql-test/suite/maria/rollback.test b/mysql-test/suite/maria/rollback.test index 40a96b9b05b..1469c26eaa2 100644 --- a/mysql-test/suite/maria/rollback.test +++ b/mysql-test/suite/maria/rollback.test @@ -3,7 +3,7 @@ # no-protocol doesn't print warnings about repaired tables --source include/no_protocol.inc -call mtr.add_suppression("Table '.*' is marked as crashed and should be repaired"); +call mtr.add_suppression("Table was marked as crashed"); call mtr.add_suppression("Checking table: .*"); # diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index c7c3079f28f..2701dac56c4 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -179,6 +179,8 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, { /* Can't scan one range => can't do MRR scan at all */ total_rows= HA_POS_ERROR; + if (thd->is_error()) + DBUG_RETURN(HA_POS_ERROR); break; } if (pages.first_page == UNUSED_PAGE_NO) diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index c180365f9e4..1e6680d30de 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -1477,6 +1477,7 @@ int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) maria_chk_init(param); param->thd= thd; param->op_name= "repair"; + file->error_count=0; /* The following can only be true if the table was marked as STATE_MOVED diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index 49604fa43f6..23135ff00a9 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -1082,6 +1082,10 @@ static my_bool _ma_read_bitmap_page(MARIA_HA *info, bitmap->used_size= (uint) ((data + 1) - end); DBUG_ASSERT(bitmap->used_size <= bitmap->total_size); } + else + { + _ma_set_fatal_error(info, my_errno); + } /* We can't check maria_bitmap_marker here as if the bitmap page previously had a true checksum and the user switched mode to not checksum @@ -3204,6 +3208,7 @@ _ma_bitmap_create_missing_into_pagecache(MARIA_SHARE *share, */ return FALSE; err: + _ma_set_fatal_error_with_share(share, my_errno); return TRUE; } diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 05040d962eb..baa777edcf0 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -921,7 +921,7 @@ static my_bool extend_area_on_page(MARIA_HA *info, DBUG_PRINT("error", ("Not enough space: " "length: %u request_length: %u", length, request_length)); - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(1); /* Error in block */ } *empty_space= length; /* All space is here */ @@ -1788,7 +1788,10 @@ static my_bool get_head_or_tail_page(MARIA_HA *info, page_link.changed= res->buff != 0; push_dynamic(&info->pinned_pages, (void*) &page_link); if (!page_link.changed) - goto crashed; + { + _ma_set_fatal_error(info, my_errno); + DBUG_RETURN(1); + } DBUG_ASSERT((uint) (res->buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type); @@ -1826,7 +1829,7 @@ static my_bool get_head_or_tail_page(MARIA_HA *info, crashed: DBUG_ASSERT(!maria_assert_if_crashed_table); - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); /* File crashed */ + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); /* File crashed */ DBUG_RETURN(1); } @@ -1884,7 +1887,10 @@ static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info, page_link.changed= buff != 0; push_dynamic(&info->pinned_pages, (void*) &page_link); if (!page_link.changed) /* Read error */ - goto err; + { + _ma_set_fatal_error(info, my_errno); + DBUG_RETURN(1); + } DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == (uchar) page_type); if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != (uchar) page_type) @@ -1921,7 +1927,7 @@ static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info, err: DBUG_ASSERT(!maria_assert_if_crashed_table); - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); /* File crashed */ + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); /* File crashed */ DBUG_RETURN(1); } @@ -2146,7 +2152,7 @@ static my_bool write_full_pages(MARIA_HA *info, { if (!--sub_blocks) { - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(1); } @@ -3475,7 +3481,7 @@ static my_bool write_block_record(MARIA_HA *info, crashed: DBUG_ASSERT(!maria_assert_if_crashed_table); /* Something was wrong with data on page */ - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); disk_err: /** @@ -3759,7 +3765,10 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, page_link.changed= buff != 0; push_dynamic(&info->pinned_pages, (void*) &page_link); if (!buff) + { + _ma_set_fatal_error(info, my_errno); goto err; + } org_empty_size= uint2korr(buff + EMPTY_SPACE_OFFSET); rownr= ma_recordpos_to_dir_entry(record_pos); @@ -3947,7 +3956,10 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, page_link.changed= buff != 0; push_dynamic(&info->pinned_pages, (void*) &page_link); if (!buff) + { + _ma_set_fatal_error(info, my_errno); goto err; + } org_empty_size= uint2korr(buff + EMPTY_SPACE_OFFSET); dir= dir_entry_pos(buff, block_size, rownr); @@ -3958,7 +3970,7 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, ("org_empty_size: %u head_length: %u length_on_page: %u", org_empty_size, (uint) cur_row->head_length, length_on_head_page)); - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); goto err; } @@ -4200,7 +4212,10 @@ static my_bool delete_head_or_tail(MARIA_HA *info, page_link.changed= buff != 0; push_dynamic(&info->pinned_pages, (void*) &page_link); if (!buff) + { + _ma_set_fatal_error(info, my_errno); DBUG_RETURN(1); + } DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == (head ? HEAD_PAGE : TAIL_PAGE)); @@ -4608,7 +4623,7 @@ static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, crashed: DBUG_ASSERT(!maria_assert_if_crashed_table); - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_PRINT("error", ("wrong extent information")); DBUG_RETURN(0); } @@ -4754,7 +4769,7 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record, { /* File crashed */ DBUG_ASSERT(!maria_assert_if_crashed_table); - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); } if (!trnman_can_read_from(info->trn, cur_row->trid)) @@ -5042,7 +5057,7 @@ err: DBUG_ASSERT(!maria_assert_if_crashed_table); /* Something was wrong with data on record */ DBUG_PRINT("error", ("Found record with wrong data")); - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); } @@ -5554,7 +5569,7 @@ restart_bitmap_scan: (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) == 0) { DBUG_PRINT("error", ("Wrong page header")); - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); } DBUG_PRINT("info", ("Page %lu has %u rows", @@ -5601,7 +5616,7 @@ restart_bitmap_scan: err: DBUG_ASSERT(!maria_assert_if_crashed_table); DBUG_PRINT("error", ("Wrong data on page")); - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); } @@ -6523,7 +6538,7 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, DBUG_RETURN(result); crashed_file: - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); err: error= my_errno; if (lock_method == PAGECACHE_LOCK_LEFT_WRITELOCKED) @@ -6611,7 +6626,7 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, if (delete_dir_entry(share, buff, rownr, &empty_space) < 0) { - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); goto err; } diff --git a/storage/maria/ma_cache.c b/storage/maria/ma_cache.c index 6684a1df20b..59cc0ad6ba9 100644 --- a/storage/maria/ma_cache.c +++ b/storage/maria/ma_cache.c @@ -107,7 +107,7 @@ my_bool _ma_read_cache(MARIA_HA *handler, IO_CACHE *info, uchar *buff, if (!my_errno || my_errno == HA_ERR_FILE_TOO_SHORT) { if (!handler->in_check_table) - _ma_set_fatal_error(handler->s, HA_ERR_FILE_TOO_SHORT); + _ma_set_fatal_error(handler, HA_ERR_FILE_TOO_SHORT); if (!my_errno) my_errno= HA_ERR_WRONG_IN_RECORD; } diff --git a/storage/maria/ma_crypt.c b/storage/maria/ma_crypt.c index 9282405bae9..a69ebf3928f 100644 --- a/storage/maria/ma_crypt.c +++ b/storage/maria/ma_crypt.c @@ -470,9 +470,10 @@ static int ma_encrypt(MARIA_SHARE *share, MARIA_CRYPT_DATA *crypt_data, */ my_errno= HA_ERR_DECRYPTION_FAILED; my_printf_error(HA_ERR_DECRYPTION_FAILED, - "Unknown key id %u. Can't continue!", + "Unknown key id %u for %s. Can't continue!", MYF(ME_FATAL|ME_ERROR_LOG), - crypt_data->scheme.key_id); + crypt_data->scheme.key_id, + share->open_file_name.str); return 1; } diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c index cbba9d975dc..b24cfcc6842 100644 --- a/storage/maria/ma_delete.c +++ b/storage/maria/ma_delete.c @@ -45,10 +45,10 @@ int maria_delete(MARIA_HA *info,const uchar *record) /* Test if record is in datafile */ DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", - maria_print_error(share, HA_ERR_CRASHED); + _ma_print_error(info, HA_ERR_CRASHED, 0); DBUG_RETURN(my_errno= HA_ERR_CRASHED);); DBUG_EXECUTE_IF("my_error_test_undefined_error", - maria_print_error(share, INT_MAX); + _ma_print_error(info, INT_MAX, 0); DBUG_RETURN(my_errno= INT_MAX);); if (!(info->update & HA_STATE_AKTIV)) { @@ -139,7 +139,7 @@ err: info->update|=HA_STATE_WRITTEN; /* Buffer changed */ if (save_errno != HA_ERR_RECORD_CHANGED) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); save_errno= HA_ERR_CRASHED; } DBUG_RETURN(my_errno= save_errno); @@ -215,7 +215,7 @@ my_bool _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEY *key, if ((old_root=*root) == HA_OFFSET_ERROR) { - _ma_set_fatal_error(info->s, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); DBUG_RETURN(1); } @@ -354,7 +354,7 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag, if (!(tmp_key_length=(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag, &kpos))) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); goto err; } root= _ma_row_pos_from_key(&tmp_key); @@ -415,7 +415,7 @@ static int d_search(MARIA_HA *info, MARIA_KEY *key, uint32 comp_flag, { /* This should newer happend */ DBUG_PRINT("error",("Didn't find key")); - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); goto err; } save_flag=0; diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c index 7bd85ae5fd1..33f238d9754 100644 --- a/storage/maria/ma_dynrec.c +++ b/storage/maria/ma_dynrec.c @@ -415,7 +415,7 @@ static int _ma_find_writepos(MARIA_HA *info, BLOCK_DELETED)) { DBUG_PRINT("error",("Delete link crashed")); - _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error_with_share(info->s, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(-1); } info->s->state.dellink=block_info.next_filepos; @@ -544,7 +544,7 @@ static my_bool update_backward_delete_link(MARIA_HA *info, } else { - _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(1); /* Wrong delete link */ } } @@ -577,7 +577,7 @@ static my_bool delete_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, (length=(uint) (block_info.filepos-filepos) +block_info.block_len) < MARIA_MIN_BLOCK_LENGTH) { - _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(1); } /* Check if next block is a delete block */ @@ -863,7 +863,7 @@ static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, { DBUG_PRINT("error",("Got wrong block info")); if (!(error & BLOCK_FATAL_ERROR)) - _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); goto err; } length=(ulong) (block_info.filepos-filepos) + block_info.block_len; @@ -1381,7 +1381,7 @@ size_t _ma_rec_unpack(register MARIA_HA *info, register uchar *to, uchar *from, DBUG_RETURN(found_length); err: - _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_PRINT("error",("to_end: %p -> %p from_end: %p -> %p", to, to_end, from, from_end)); DBUG_DUMP("from", info->rec_buff, info->s->base.min_pack_length); @@ -1580,7 +1580,7 @@ err: DBUG_RETURN(my_errno); panic: - _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); goto err; } @@ -1686,7 +1686,7 @@ my_bool _ma_cmp_dynamic_record(register MARIA_HA *info, } } else if (reclength < block_info.data_len) { - _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); goto err; } reclength-= block_info.data_len; @@ -1925,7 +1925,7 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, if (my_errno == HA_ERR_FILE_TOO_SHORT) { /* Unexpected end of file */ - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); } goto err; } @@ -1954,7 +1954,7 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info, panic: /* Something is fatal wrong */ - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); err: fast_ma_writeinfo(info); DBUG_RETURN(my_errno); @@ -2103,7 +2103,7 @@ err: if (!handler->in_check_table) { /* We may be scanning the table for new rows; Don't give an error */ - _ma_set_fatal_error(handler->s, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(handler, HA_ERR_WRONG_IN_RECORD); } return BLOCK_ERROR; } diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c index 94e5e448b09..425cb421e22 100644 --- a/storage/maria/ma_extra.c +++ b/storage/maria/ma_extra.c @@ -174,7 +174,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, if ((error= flush_io_cache(&info->rec_cache))) { /* Fatal error found */ - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); } } break; @@ -441,7 +441,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function, { /* Fatal error found */ share->changed= 1; - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); } } mysql_mutex_unlock(&share->intern_lock); @@ -584,6 +584,7 @@ int maria_reset(MARIA_HA *info) info->page_changed= 1; info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND | HA_STATE_PREV_FOUND); + info->error_count= 0; DBUG_RETURN(error); } @@ -665,7 +666,7 @@ int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index, if (!error) DBUG_RETURN(0); - _ma_set_fatal_error(info->s, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); DBUG_RETURN(1); } diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c index f31113d8384..ddf92654be0 100644 --- a/storage/maria/ma_info.c +++ b/storage/maria/ma_info.c @@ -111,12 +111,13 @@ int maria_status(MARIA_HA *info, register MARIA_INFO *x, uint flag) /* - Write a message to the error log. + Write a message to the user or the error log. SYNOPSIS _ma_report_error() file_name Name of table file (e.g. index_file_name). errcode Error number. + flags Flags to my_error DESCRIPTION This function supplies my_error() with a table name. Most error @@ -129,12 +130,12 @@ int maria_status(MARIA_HA *info, register MARIA_INFO *x, uint flag) void */ -void _ma_report_error(int errcode, const LEX_STRING *name) +void _ma_report_error(int errcode, const LEX_STRING *name, myf flags) { size_t length; const char *file_name= name->str; DBUG_ENTER("_ma_report_error"); - DBUG_PRINT("enter",("errcode %d, table '%s'", errcode, file_name)); + DBUG_PRINT("enter",("error: %d table: '%s'", errcode, file_name)); if ((length= name->length) > 64) { @@ -147,7 +148,81 @@ void _ma_report_error(int errcode, const LEX_STRING *name) file_name+= length - 64; } } + my_printf_error(errcode, "Got error '%M' for '%s'", + flags, (int) errcode, file_name); + DBUG_VOID_RETURN; +} + + +/** + If standalone report all errors to the user + If run trough the Aria handler, only report first error to the user + to not spam him - my_error(errcode, MYF(ME_ERROR_LOG), file_name); + @param info Aria Handler + @param error Error code + @apram write_to_log If set to 1, print the error to the log. This is only set + when a table was found to be crashed the first time +*/ + +void _ma_print_error(MARIA_HA *info, int error, my_bool write_to_log) +{ + DBUG_ENTER("_ma_print_error"); + DBUG_PRINT("error", ("error: %d log: %d", error, write_to_log)); + if (!info->error_count++ || !maria_in_ha_maria || write_to_log) + { + MARIA_SHARE *share= info->s; + _ma_report_error(error, + (share->index_file_name.length ? + &share->index_file_name : + &share->unique_file_name), + MYF(write_to_log ? ME_ERROR_LOG : 0)); + } DBUG_VOID_RETURN; } + + +/* + Handle a fatal error + + - Mark the table as crashed + - Print an error message, if we had not issued an error message before + that the table had been crashed. + - set my_errno to error + - If 'maria_assert_if_crashed_table is set, then assert. +*/ + +void _ma_set_fatal_error(MARIA_HA *info, int error) +{ + MARIA_SHARE *share= info->s; + _ma_print_error(info, error, + (share->state.changed & STATE_CRASHED_PRINTED) == 0); + maria_mark_crashed_share(share); + share->state.changed|= STATE_CRASHED_PRINTED; + my_errno= error; + DBUG_ASSERT(!maria_assert_if_crashed_table); +} + + +/* + Similar to the above, but only used from maria_open() where we don't have + an active handler object. Here we don't set a fatal error as we may + still want to do an automatic repair on the table +*/ + +void _ma_set_fatal_error_with_share(MARIA_SHARE *share, int error) +{ + DBUG_PRINT("error", ("error: %d", error)); + + if (!(share->state.changed & STATE_CRASHED_PRINTED)) + { + _ma_report_error(error, + (share->index_file_name.length ? + &share->index_file_name : + &share->unique_file_name), + MYF(ME_WARNING | ME_ERROR_LOG)); + } + maria_mark_crashed_share(share); + share->state.changed|= STATE_CRASHED_PRINTED; + DBUG_ASSERT(!maria_assert_if_crashed_table); +} diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c index 51a042b381e..d47e8cf715a 100644 --- a/storage/maria/ma_key.c +++ b/storage/maria/ma_key.c @@ -644,7 +644,7 @@ int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos) { /* Read only key */ if (_ma_put_key_in_record(info, (uint)info->lastinx, TRUE, buf)) { - _ma_set_fatal_error(info->s, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); return -1; } info->update|= HA_STATE_AKTIV; /* We should find a record */ @@ -687,7 +687,7 @@ check_result_t ma_check_index_cond(register MARIA_HA *info, uint keynr, if (_ma_put_key_in_record(info, keynr, FALSE, record)) { /* Impossible case; Can only happen if bug in code */ - maria_print_error(info->s, HA_ERR_CRASHED); + _ma_print_error(info, HA_ERR_CRASHED, 0); info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */ my_errno= HA_ERR_CRASHED; res= CHECK_ERROR; diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c index 2f28ec8d175..acec592b922 100644 --- a/storage/maria/ma_key_recover.c +++ b/storage/maria/ma_key_recover.c @@ -771,6 +771,7 @@ uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn, my_errno != HA_ERR_DECRYPTION_FAILED) { result= 1; + _ma_set_fatal_error(info, my_errno); goto err; } buff= pagecache_block_link_to_buffer(page_link.link); @@ -861,6 +862,7 @@ uint _ma_apply_redo_index_free_page(MARIA_HA *info, &page_link.link))) { result= (uint) my_errno; + _ma_set_fatal_error(info, my_errno); goto err; } if (lsn_korr(buff) >= lsn) @@ -949,7 +951,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, #ifdef DBUG_ASSERT_EXISTS uint new_page_length= 0; #endif - int result; + int result, mark_crashed; MARIA_PAGE page; DBUG_ENTER("_ma_apply_redo_index"); DBUG_PRINT("enter", ("page: %lu", (ulong) page_pos)); @@ -962,14 +964,15 @@ uint _ma_apply_redo_index(MARIA_HA *info, PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE, &page_link.link))) { - result= 1; + result= 1; mark_crashed= 0; + _ma_set_fatal_error(info, my_errno); goto err; } if (lsn_korr(buff) >= lsn) { /* Already applied */ check_skipped_lsn(info, lsn_korr(buff), 0, page_pos); - result= 0; + result= mark_crashed= 0; goto err; } @@ -1165,7 +1168,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, header+= TRANSID_SIZE; if (_ma_compact_keypage(&page, transid)) { - result= 1; + result= mark_crashed= 1; goto err; } page_length= page.size; @@ -1174,7 +1177,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, case KEY_OP_NONE: default: DBUG_ASSERT(0); - result= 1; + result= mark_crashed= 1; goto err; } } while (header < header_end); @@ -1203,7 +1206,7 @@ err: PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN, LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0, FALSE); - if (result) + if (mark_crashed) _ma_mark_file_crashed(share); DBUG_RETURN(result); } diff --git a/storage/maria/ma_keycache.c b/storage/maria/ma_keycache.c index ba6da707240..2ff8d019d1c 100644 --- a/storage/maria/ma_keycache.c +++ b/storage/maria/ma_keycache.c @@ -80,7 +80,7 @@ int maria_assign_to_pagecache(MARIA_HA *info, { error= my_errno; /* Mark that table must be checked */ - _ma_set_fatal_error(share, error); + _ma_set_fatal_error(info, error); } /* diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c index e53c3759f13..b14a8ddfdcd 100644 --- a/storage/maria/ma_locking.c +++ b/storage/maria/ma_locking.c @@ -82,7 +82,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type) if (end_io_cache(&info->rec_cache)) { error= my_errno; - _ma_set_fatal_error(share, error); + _ma_set_fatal_error(info, error); } } if (!count) @@ -129,7 +129,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type) else share->not_flushed=1; if (error) - _ma_set_fatal_error(share, error); + _ma_set_fatal_error(info, error); } } info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); @@ -572,29 +572,6 @@ void _ma_mark_file_crashed(MARIA_SHARE *share) DBUG_VOID_RETURN; } -/* - Handle a fatal error - - - Mark the table as crashed - - Print an error message, if we had not issued an error message before - that the table had been crashed. - - set my_errno to error - - If 'maria_assert_if_crashed_table is set, then assert. -*/ - -void _ma_set_fatal_error(MARIA_SHARE *share, int error) -{ - DBUG_PRINT("error", ("error: %d", error)); - maria_mark_crashed_share(share); - if (!(share->state.changed & STATE_CRASHED_PRINTED)) - { - share->state.changed|= STATE_CRASHED_PRINTED; - maria_print_error(share, error); - } - my_errno= error; - DBUG_ASSERT(!maria_assert_if_crashed_table); -} - /** @brief Set uuid of for a Maria file diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 09f156722cf..d642f1a7194 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -44,7 +44,7 @@ static uchar *_ma_state_info_read(uchar *, MARIA_STATE_INFO *, myf); #define disk_pos_assert(share, pos, end_pos) \ if (pos > end_pos) \ { \ - _ma_set_fatal_error(share, HA_ERR_CRASHED); \ + _ma_set_fatal_error_with_share(share, HA_ERR_CRASHED); \ goto err; \ } @@ -232,7 +232,8 @@ err: if ((save_errno == HA_ERR_CRASHED) || (save_errno == HA_ERR_CRASHED_ON_USAGE) || (save_errno == HA_ERR_CRASHED_ON_REPAIR)) - _ma_report_error(save_errno, &share->open_file_name); + _ma_report_error(save_errno, &share->open_file_name, + MYF(ME_ERROR_LOG)); switch (errpos) { case 6: (*share->end)(&info); @@ -475,7 +476,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags, { if (mysql_file_pread(kfile, disk_cache, info_length, 0L, MYF(MY_NABP))) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(share, HA_ERR_CRASHED); goto err; } } @@ -583,7 +584,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags, /* sanity check */ if (share->base.keystart > 65535 || share->base.rec_reflength > 8) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(share, HA_ERR_CRASHED); goto err; } @@ -784,7 +785,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags, pos[0].language= pos[-1].language; if (!(pos[0].charset= pos[-1].charset)) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(share, HA_ERR_CRASHED); goto err; } pos++; @@ -1192,7 +1193,7 @@ err: LEX_STRING tmp_name; tmp_name.str= (char*) name; tmp_name.length= strlen(name); - _ma_report_error(save_errno, &tmp_name); + _ma_report_error(save_errno, &tmp_name, MYF(ME_ERROR_LOG)); } switch (errpos) { case 7: @@ -2126,7 +2127,7 @@ int maria_enable_indexes(MARIA_HA *info) DBUG_PRINT("error", ("data_file_length: %lu key_file_length: %lu", (ulong) share->state.state.data_file_length, (ulong) share->state.state.key_file_length)); - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); error= HA_ERR_CRASHED; } else diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c index d7f86a9a7ae..19783423ab5 100644 --- a/storage/maria/ma_packrec.c +++ b/storage/maria/ma_packrec.c @@ -194,7 +194,7 @@ static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file, /* Only the first three bytes of magic number are independent of version. */ if (memcmp(header, maria_pack_file_magic, 3)) { - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error_with_share(share, HA_ERR_WRONG_IN_RECORD); goto err0; } share->pack.version= header[3]; /* fourth uchar of magic number */ @@ -331,7 +331,7 @@ static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file, DBUG_RETURN(0); err3: - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error_with_share(share, HA_ERR_WRONG_IN_RECORD); err2: my_free(share->decode_tables); err1: @@ -762,7 +762,7 @@ int _ma_read_pack_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos) DBUG_RETURN(_ma_pack_rec_unpack(info,&info->bit_buff, buf, info->rec_buff, block_info.rec_len)); panic: - _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); err: DBUG_RETURN(my_errno); } @@ -797,7 +797,7 @@ int _ma_pack_rec_unpack(register MARIA_HA *info, MARIA_BIT_BUFF *bit_buff, bit_buff->pos - bit_buff->bits / 8 == bit_buff->end) DBUG_RETURN(0); info->update&= ~HA_STATE_AKTIV; - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); } /* _ma_pack_rec_unpack */ @@ -1375,7 +1375,7 @@ int _ma_read_rnd_pack_record(MARIA_HA *info, #ifndef DBUG_OFF if (block_info.rec_len > share->max_pack_length) { - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); goto err; } #endif @@ -1655,7 +1655,7 @@ static int _ma_read_rnd_mempack_record(MARIA_HA *info, #ifndef DBUG_OFF if (block_info.rec_len > info->s->max_pack_length) { - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); goto err; } #endif diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c index 13f5b7d698b..25db0e8acec 100644 --- a/storage/maria/ma_page.c +++ b/storage/maria/ma_page.c @@ -128,7 +128,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, { DBUG_PRINT("error",("Got errno: %d from pagecache_read",my_errno)); info->last_keypage=HA_OFFSET_ERROR; - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, my_errno); DBUG_RETURN(1); } info->last_keypage= pos; @@ -160,7 +160,7 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info, _ma_get_keynr(share, tmp))); DBUG_DUMP("page", tmp, page_size); info->last_keypage = HA_OFFSET_ERROR; - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); DBUG_RETURN(1); } } @@ -433,7 +433,10 @@ my_off_t _ma_new(register MARIA_HA *info, int level, (pgcache_page_no_t) (pos / block_size), level, 0, share->page_type, PAGECACHE_LOCK_WRITE, &(*page_link)->link))) + { pos= HA_OFFSET_ERROR; + _ma_set_fatal_error(info, my_errno); + } else { /* @@ -566,7 +569,7 @@ my_bool _ma_compact_keypage(MARIA_PAGE *ma_page, TrID min_read_from) { DBUG_PRINT("error",("Couldn't find last key: page_pos: %p", page)); - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); DBUG_RETURN(1); } if (key_has_transid(page-1)) diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c index b48b0fc0c6b..8cd82e1c6fc 100644 --- a/storage/maria/ma_rkey.c +++ b/storage/maria/ma_rkey.c @@ -94,7 +94,7 @@ int maria_rkey(MARIA_HA *info, uchar *buf, int inx, const uchar *key_data, case HA_KEY_ALG_RTREE: if (maria_rtree_find_first(info, &key, nextflag) < 0) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); info->cur_row.lastpos= HA_OFFSET_ERROR; } break; diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c index a57db7d2a2d..63c11632969 100644 --- a/storage/maria/ma_search.c +++ b/storage/maria/ma_search.c @@ -50,6 +50,12 @@ int _ma_check_index(MARIA_HA *info, int inx) my_errno= HA_ERR_INTERNAL_ERROR; /* Impossible */ return(-1); } + if (unlikely(maria_is_crashed(info))) + { + my_errno= HA_ERR_CRASHED; + return(-1); + } + return(inx); } /* _ma_check_index */ @@ -155,7 +161,7 @@ static int _ma_search_no_save(register MARIA_HA *info, MARIA_KEY *key, &last_key_not_used); if (flag == MARIA_FOUND_WRONG_KEY) { - maria_print_error(info->s, HA_ERR_CRASHED); + _ma_print_error(info, HA_ERR_CRASHED, 0); my_errno= HA_ERR_CRASHED; goto err; } @@ -389,7 +395,7 @@ int _ma_seq_search(const MARIA_KEY *key, const MARIA_PAGE *ma_page, length=(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag, &page); if (length == 0 || page > end) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(share, HA_ERR_CRASHED); DBUG_PRINT("error", ("Found wrong key: length: %u page: %p end: %p", length, page, end)); @@ -564,7 +570,7 @@ int _ma_prefix_search(const MARIA_KEY *key, const MARIA_PAGE *ma_page, if (page > end) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(share, HA_ERR_CRASHED); DBUG_PRINT("error", ("Found wrong key: length: %u page: %p end: %p", length, page, end)); @@ -1046,7 +1052,7 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag, { if (length > (uint) keyseg->length) { - _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(keyinfo->share, HA_ERR_CRASHED); return 0; /* Error */ } if (length == 0) /* Same key */ @@ -1061,7 +1067,7 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag, ("Found too long null packed key: %u of %u at %p", length, keyseg->length, *page_pos)); DBUG_DUMP("key", *page_pos, 16); - _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(keyinfo->share, HA_ERR_CRASHED); return 0; } continue; @@ -1118,7 +1124,7 @@ uint _ma_get_pack_key(MARIA_KEY *int_key, uint page_flag, DBUG_PRINT("error",("Found too long packed key: %u of %u at %p", length, keyseg->length, *page_pos)); DBUG_DUMP("key", *page_pos, 16); - _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(keyinfo->share, HA_ERR_CRASHED); return 0; /* Error */ } store_key_length_inc(key,length); @@ -1277,7 +1283,7 @@ uint _ma_get_binary_pack_key(MARIA_KEY *int_key, uint page_flag, uint nod_flag, ("Found too long binary packed key: %u of %u at %p", length, keyinfo->maxlength, *page_pos)); DBUG_DUMP("key", *page_pos, 16); - _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(keyinfo->share, HA_ERR_CRASHED); DBUG_RETURN(0); /* Wrong key */ } /* Key is packed against prev key, take prefix from prev key. */ @@ -1368,7 +1374,7 @@ uint _ma_get_binary_pack_key(MARIA_KEY *int_key, uint page_flag, uint nod_flag, if (from_end != page_end) { DBUG_PRINT("error",("Error when unpacking key")); - _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(keyinfo->share, HA_ERR_CRASHED); DBUG_RETURN(0); /* Error */ } } @@ -1458,7 +1464,7 @@ uchar *_ma_get_key(MARIA_KEY *key, MARIA_PAGE *ma_page, uchar *keypos) { if (!(*keyinfo->get_key)(key, page_flag, nod_flag, &page)) { - _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(keyinfo->share, HA_ERR_CRASHED); DBUG_RETURN(0); } } @@ -1508,7 +1514,7 @@ static my_bool _ma_get_prev_key(MARIA_KEY *key, MARIA_PAGE *ma_page, { if (! (*keyinfo->get_key)(key, page_flag, nod_flag, &page)) { - _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(keyinfo->share, HA_ERR_CRASHED); DBUG_RETURN(1); } } @@ -1561,7 +1567,7 @@ uchar *_ma_get_last_key(MARIA_KEY *key, MARIA_PAGE *ma_page, uchar *endpos) { DBUG_PRINT("error",("Couldn't find last key: page: %p", page)); - _ma_set_fatal_error(keyinfo->share, HA_ERR_CRASHED); + _ma_set_fatal_error_with_share(keyinfo->share, HA_ERR_CRASHED); DBUG_RETURN(0); } } diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c index 98fa235a2cc..d8a8b0a05d7 100644 --- a/storage/maria/ma_statrec.c +++ b/storage/maria/ma_statrec.c @@ -294,6 +294,6 @@ int _ma_read_rnd_static_record(MARIA_HA *info, uchar *buf, } /* my_errno should be set if rec_cache.error == -1 */ if (info->rec_cache.error != -1 || my_errno == 0) - _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(my_errno); /* Something wrong (EOF?) */ } diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c index cb1a1e48ad2..60ab4452384 100644 --- a/storage/maria/ma_update.c +++ b/storage/maria/ma_update.c @@ -35,7 +35,7 @@ int maria_update(register MARIA_HA *info, const uchar *oldrec, DBUG_ENTER("maria_update"); DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", - maria_print_error(info->s, HA_ERR_CRASHED); + _ma_print_error(info, HA_ERR_CRASHED, 0); DBUG_RETURN(my_errno= HA_ERR_CRASHED);); if (!(info->update & HA_STATE_AKTIV)) { @@ -217,7 +217,7 @@ err: if ((flag++ && _ma_ft_del(info,i,new_key_buff,newrec,pos)) || _ma_ft_add(info,i,old_key_buff,oldrec,pos)) { - _ma_set_fatal_error(share, my_errno); + _ma_set_fatal_error(info, my_errno); break; } } @@ -232,7 +232,7 @@ err: if ((flag++ && _ma_ck_delete(info, &new_key)) || _ma_ck_write(info, &old_key)) { - _ma_set_fatal_error(share, my_errno); + _ma_set_fatal_error(info, my_errno); break; } } @@ -240,7 +240,7 @@ err: } while (i-- != 0); } else - _ma_set_fatal_error(share, save_errno); + _ma_set_fatal_error(info, save_errno); info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_ROW_CHANGED | key_changed); @@ -248,6 +248,6 @@ err: err_end: _ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE); if (save_errno == HA_ERR_KEY_NOT_FOUND) - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); DBUG_RETURN(my_errno=save_errno); } /* maria_update */ diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index 392f0bf6d0e..1dbee5d744a 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -98,7 +98,7 @@ int maria_write(MARIA_HA *info, const uchar *record) share->kfile.file, info->dfile.file)); DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", - maria_print_error(info->s, HA_ERR_CRASHED); + _ma_print_error(info, HA_ERR_CRASHED, 0); DBUG_RETURN(my_errno= HA_ERR_CRASHED);); if (share->options & HA_OPTION_READ_ONLY_DATA) { @@ -403,10 +403,7 @@ err: } if (fatal_error) - { - maria_print_error(info->s, HA_ERR_CRASHED); - maria_mark_crashed(info); - } + _ma_set_fatal_error(info, HA_ERR_CRASHED); info->update= (HA_STATE_CHANGED | HA_STATE_WRITTEN | HA_STATE_ROW_CHANGED); my_errno=save_errno; @@ -835,7 +832,7 @@ int _ma_insert(register MARIA_HA *info, MARIA_KEY *key, { if (t_length >= keyinfo->maxlength*2+MARIA_INDEX_OVERHEAD_SIZE) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); DBUG_RETURN(-1); } bmove_upp(endpos+t_length, endpos, (uint) (endpos-key_pos)); @@ -844,7 +841,7 @@ int _ma_insert(register MARIA_HA *info, MARIA_KEY *key, { if (-t_length >= keyinfo->maxlength*2+MARIA_INDEX_OVERHEAD_SIZE) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); DBUG_RETURN(-1); } bmove(key_pos,key_pos-t_length,(uint) (endpos-key_pos)+t_length); @@ -1206,7 +1203,7 @@ static uchar *_ma_find_last_pos(MARIA_KEY *int_key, MARIA_PAGE *ma_page, if (!(length=(*keyinfo->get_key)(&tmp_key, page_flag, 0, &page))) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); DBUG_RETURN(0); } @@ -1219,7 +1216,7 @@ static uchar *_ma_find_last_pos(MARIA_KEY *int_key, MARIA_PAGE *ma_page, memcpy(int_key->data, key_buff, length); /* previous key */ if (!(length=(*keyinfo->get_key)(&tmp_key, page_flag, 0, &page))) { - _ma_set_fatal_error(share, HA_ERR_CRASHED); + _ma_set_fatal_error(info, HA_ERR_CRASHED); DBUG_RETURN(0); } } while (page < end); diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 722ab150160..c7aef97072b 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -976,6 +976,7 @@ struct st_maria_handler uint opt_flag; /* Optim. for space/speed */ uint open_flags; /* Flags used in open() */ uint update; /* If file changed since open */ + uint error_count; /* Incremented for each error given */ int lastinx; /* Last used index */ uint last_rkey_length; /* Last length in maria_rkey() */ uint *last_rtree_keypos; /* Last key positions for rtrees */ @@ -1141,20 +1142,6 @@ struct ha_table_option_struct #define maria_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR) #define maria_in_repair(x) ((x)->s->state.changed & STATE_IN_REPAIR) -#ifdef EXTRA_DEBUG -/** - Brings additional information in certain debug builds and in standalone - (non-ha_maria) programs. To help debugging. Not in ha_maria, to not spam the - user (some messages can be produced many times per statement, or even - wrongly during some repair operations). -*/ -#define maria_print_error(SHARE, ERRNO) \ - do{ if (!maria_in_ha_maria) \ - _ma_report_error((ERRNO), &(SHARE)->index_file_name); } \ - while(0) -#else -#define maria_print_error(SHARE, ERRNO) while (0) -#endif #define DBUG_DUMP_KEY(name, key) DBUG_DUMP(name, (key)->data, (key)->data_length + (key)->ref_length) /* Functions to store length of space packed keys, VARCHAR or BLOB keys */ @@ -1413,7 +1400,8 @@ extern int _ma_test_if_changed(MARIA_HA *info); extern int _ma_mark_file_changed(MARIA_SHARE *info); extern int _ma_mark_file_changed_now(MARIA_SHARE *info); extern void _ma_mark_file_crashed(MARIA_SHARE *share); -void _ma_set_fatal_error(MARIA_SHARE *share, int error); +extern void _ma_set_fatal_error(MARIA_HA *share, int error); +extern void _ma_set_fatal_error_with_share(MARIA_SHARE *share, int error); extern my_bool _ma_set_uuid(MARIA_SHARE *info, my_bool reset_uuid); extern my_bool _ma_check_if_zero(uchar *pos, size_t size); extern int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_table); @@ -1600,7 +1588,9 @@ extern uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff, size_t *rec_buff_size, File file, my_off_t filepos); extern void _ma_store_blob_length(uchar *pos, uint pack_length, uint length); -extern void _ma_report_error(int errcode, const LEX_STRING *file_name); +extern void _ma_report_error(int errcode, const LEX_STRING *file_name, + myf flags); +extern void _ma_print_error(MARIA_HA *info, int error, my_bool write_to_log); extern my_bool _ma_memmap_file(MARIA_HA *info); extern void _ma_unmap_file(MARIA_HA *info); extern uint _ma_save_pack_length(uint version, uchar * block_buff, -- cgit v1.2.1 From 960f0344a21af4bd8d422b9bfb9a617947d7541d Mon Sep 17 00:00:00 2001 From: chansuke Date: Tue, 7 Jun 2022 22:33:37 +0900 Subject: MDEV-25273: fix typo (s/strucures/structures/) --- sql/share/errmsg-utf8.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index 300ec1eaa46..b05fa5e5412 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7637,7 +7637,7 @@ ER_JSON_SYNTAX ER_JSON_ESCAPING eng "Incorrect escaping in JSON text in argument %d to function '%s' at position %d" ER_JSON_DEPTH - eng "Limit of %d on JSON nested strucures depth is reached in argument %d to function '%s' at position %d" + eng "Limit of %d on JSON nested structures depth is reached in argument %d to function '%s' at position %d" ER_JSON_PATH_EOS eng "Unexpected end of JSON path in argument %d to function '%s'" ER_JSON_PATH_SYNTAX -- cgit v1.2.1 From c9498f33dea998de7a128dcd86368f064513cea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 8 Jun 2022 09:20:48 +0300 Subject: MDEV-18519: Assertion failure in btr_page_reorganize_low() Even after commit 0b47c126e31cddda1e94588799599e138400bcf8 there are a few ib::fatal() calls in non-debug code that can be replaced easily. btr_page_reorganize_low(): On size invariant violation, return an error code instead of crashing. btr_check_blob_fil_page_type(): On an invalid page type, report an error but do not crash. btr_copy_blob_prefix(): Truncate the output if a page type is invalid. dict_load_foreign_cols(): On an error, return DB_CORRUPTION instead of crashing. fil_space_decrypt_full_crc32(), fil_space_decrypt_for_non_full_checksum(): On error, return DB_DECRYPTION_FAILED instead of crashing. fil_set_max_space_id_if_bigger(): Replace ib::fatal() with an equivalent ut_a() assertion. --- storage/innobase/btr/btr0btr.cc | 10 ++++++---- storage/innobase/btr/btr0cur.cc | 35 +++++++++++++++++------------------ storage/innobase/dict/dict0load.cc | 11 ++++++----- storage/innobase/fil/fil0crypt.cc | 23 ++--------------------- storage/innobase/fil/fil0fil.cc | 4 +--- 5 files changed, 32 insertions(+), 51 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index c3fa33717f7..72375fc6c43 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -1305,10 +1305,12 @@ static dberr_t btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, page_get_max_insert_size_after_reorganize(block->page.frame, 1); if (UNIV_UNLIKELY(data_size1 != data_size2 || max1 != max2)) - ib::fatal() << "Page old data size " << data_size1 - << " new data size " << data_size2 - << ", page old max ins size " << max1 - << " new max ins size " << max2; + { + sql_print_error("InnoDB: Page old data size %u new data size %u" + ", page old max ins size %zu new max ins size %zu", + data_size1, data_size2, max1, max2); + return DB_CORRUPTION; + } /* Restore the cursor position. */ if (pos) diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index adce2ed2b6f..3499e9d84fe 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -71,6 +71,7 @@ Created 10/16/1994 Heikki Tuuri #ifdef WITH_WSREP #include "mysql/service_wsrep.h" #endif /* WITH_WSREP */ +#include "log.h" /** Buffered B-tree operation types, introduced as part of delete buffering. */ enum btr_op_t { @@ -7196,29 +7197,29 @@ func_exit: } /** Check the FIL_PAGE_TYPE on an uncompressed BLOB page. -@param[in] block uncompressed BLOB page -@param[in] read true=read, false=purge */ -static void btr_check_blob_fil_page_type(const buf_block_t& block, bool read) +@param block uncompressed BLOB page +@param op operation +@return whether the type is invalid */ +static bool btr_check_blob_fil_page_type(const buf_block_t& block, + const char *op) { uint16_t type= fil_page_get_type(block.page.frame); - if (UNIV_LIKELY(type == FIL_PAGE_TYPE_BLOB)) - return; - /* FIXME: take the tablespace as a parameter */ - if (fil_space_t *space= fil_space_t::get(block.page.id().space())) + if (UNIV_LIKELY(type == FIL_PAGE_TYPE_BLOB)); + else if (fil_space_t *space= fil_space_t::get(block.page.id().space())) { /* Old versions of InnoDB did not initialize FIL_PAGE_TYPE on BLOB pages. Do not print anything about the type mismatch when reading a BLOB page that may be from old versions. */ - if (space->full_crc32() || DICT_TF_HAS_ATOMIC_BLOBS(space->flags)) - { - ib::fatal() << "FIL_PAGE_TYPE=" << type - << (read ? " on BLOB read file " : " on BLOB purge file ") - << space->chain.start->name - << " page " << block.page.id().page_no(); - } + bool fail= space->full_crc32() || DICT_TF_HAS_ATOMIC_BLOBS(space->flags); + if (fail) + sql_print_error("InnoDB: FIL_PAGE_TYPE=%u on BLOB %s file %s page %u", + type, op, space->chain.start->name, + block.page.id().page_no()); space->release(); + return fail; } + return false; } /*******************************************************************//** @@ -7365,7 +7366,7 @@ skip_free: } } else { ut_ad(!block->page.zip.data); - btr_check_blob_fil_page_type(*ext_block, false); + btr_check_blob_fil_page_type(*ext_block, "purge"); const uint32_t next_page_no = mach_read_from_4( page + FIL_PAGE_DATA @@ -7499,14 +7500,12 @@ btr_copy_blob_prefix( mtr_start(&mtr); block = buf_page_get(id, 0, RW_S_LATCH, &mtr); - if (!block) { + if (!block || btr_check_blob_fil_page_type(*block, "read")) { mtr.commit(); return copied_len; } page = buf_block_get_frame(block); - btr_check_blob_fil_page_type(*block, true); - blob_header = page + offset; part_len = btr_blob_get_part_len(blob_header); copy_len = ut_min(part_len, len - copied_len); diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index eb886a5554e..76d1f675ff6 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -2657,7 +2657,6 @@ static dberr_t dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id) goto func_exit; } for (ulint i = 0; i < foreign->n_fields; i++) { -retry: ut_a(btr_pcur_is_on_user_rec(&pcur)); const rec_t* rec = btr_pcur_get_rec(&pcur); @@ -2690,9 +2689,7 @@ retry: if (rec_get_deleted_flag(rec, 0)) { ut_ad(id); -next: - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - goto retry; + goto next; } field = rec_get_nth_field_old( @@ -2718,7 +2715,7 @@ next: rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, &ref_col_name_len); - ib::fatal sout; + ib::error sout; sout << "Unable to load column names for foreign" " key '" << foreign->id @@ -2733,6 +2730,9 @@ next: sout << "', REF_COL_NAME='"; sout.write(ref_col_name, ref_col_name_len); sout << "')"; + + err = DB_CORRUPTION; + break; } field = rec_get_nth_field_old( @@ -2750,6 +2750,7 @@ next: foreign->referenced_col_names[i] = mem_heap_strdupl( foreign->heap, (char*) field, len); +next: btr_pcur_move_to_next_user_rec(&pcur, &mtr); } func_exit: diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 6c8498f96a3..516629fd98a 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -665,15 +665,7 @@ static dberr_t fil_space_decrypt_full_crc32( (uint) space, offset, lsn); if (rc != MY_AES_OK || dstlen != srclen) { - if (rc == -1) { - return DB_DECRYPTION_FAILED; - } - - ib::fatal() << "Unable to decrypt data-block " - << " src: " << src << "srclen: " - << srclen << " buf: " << dst << "buflen: " - << dstlen << " return-code: " << rc - << " Can't continue!"; + return DB_DECRYPTION_FAILED; } /* Copy only checksum part in the trailer */ @@ -735,18 +727,7 @@ static dberr_t fil_space_decrypt_for_non_full_checksum( space, offset, lsn); if (! ((rc == MY_AES_OK) && ((ulint) dstlen == srclen))) { - - if (rc == -1) { - return DB_DECRYPTION_FAILED; - } - - ib::fatal() << "Unable to decrypt data-block " - << " src: " << static_cast(src) - << "srclen: " - << srclen << " buf: " - << static_cast(dst) << "buflen: " - << dstlen << " return-code: " << rc - << " Can't continue!"; + return DB_DECRYPTION_FAILED; } /* For compressed tables we do not store the FIL header because diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 07f77add744..de94ec5f0c1 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1393,9 +1393,7 @@ fil_set_max_space_id_if_bigger( /*===========================*/ ulint max_id) /*!< in: maximum known id */ { - if (max_id >= SRV_SPACE_ID_UPPER_BOUND) { - ib::fatal() << "Max tablespace id is too high, " << max_id; - } + ut_a(max_id < SRV_SPACE_ID_UPPER_BOUND); mysql_mutex_lock(&fil_system.mutex); -- cgit v1.2.1 From 892c426371b4be558d32fdeba7d1d56f46b40f2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 8 Jun 2022 09:48:12 +0300 Subject: MDEV-13542: Do not crash on decryption failure fil_page_type_validate(): Remove. This debug check was mostly redundant and added little value to the code paths that deal with page_compressed or encrypted pages. fil_get_page_type_name(): Remove; unused function. fil_space_decrypt(): Return an error if the page is not supposed to be encrypted. It is possible that an unencrypted page contains a nonzero key_version field even though it is not supposed to be encrypted. Previously we would crash in such a situation. buf_page_decrypt_after_read(): Simplify the code. Remove some unnecessary error message about temporary tablespace corruption. This is where we would usually invoke fil_space_decrypt(). --- storage/innobase/CMakeLists.txt | 1 - storage/innobase/buf/buf0buf.cc | 25 ++---- storage/innobase/buf/buf0flu.cc | 3 - storage/innobase/fil/fil0crypt.cc | 24 +++--- storage/innobase/include/fil0crypt.h | 6 +- storage/innobase/include/fil0fil.h | 6 +- storage/innobase/include/fil0fil.inl | 145 ----------------------------------- 7 files changed, 22 insertions(+), 188 deletions(-) delete mode 100644 storage/innobase/include/fil0fil.inl diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index cc31b3c5dcc..bb10160370a 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -128,7 +128,6 @@ SET(INNOBASE_SOURCES include/fil0crypt.h include/fil0crypt.inl include/fil0fil.h - include/fil0fil.inl include/fil0pagecompress.h include/fsp0file.h include/fsp0fsp.h diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 6eb1b6d463a..e6a6bd8cbb7 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -403,28 +403,22 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage, return (true); } - if (node.space->purpose == FIL_TYPE_TEMPORARY + buf_tmp_buffer_t* slot; + + if (id.space() == SRV_TMP_SPACE_ID && innodb_encrypt_temporary_tables) { - buf_tmp_buffer_t* slot = buf_pool.io_buf_reserve(); + slot = buf_pool.io_buf_reserve(); ut_a(slot); slot->allocate(); - - if (!buf_tmp_page_decrypt(slot->crypt_buf, dst_frame)) { - slot->release(); - ib::error() << "Encrypted page " << id - << " in file " << node.name; - return false; - } - + bool ok = buf_tmp_page_decrypt(slot->crypt_buf, dst_frame); slot->release(); - return true; + return ok; } /* Page is encrypted if encryption information is found from tablespace and page contains used key_version. This is true also for pages first compressed and then encrypted. */ - buf_tmp_buffer_t* slot; uint key_version = buf_page_get_key_version(dst_frame, flags); if (page_compressed && !key_version) { @@ -441,13 +435,9 @@ decompress: slot->allocate(); decompress_with_slot: - ut_d(fil_page_type_validate(node.space, dst_frame)); - ulint write_size = fil_page_decompress( slot->crypt_buf, dst_frame, flags); slot->release(); - ut_ad(!write_size - || fil_page_type_validate(node.space, dst_frame)); ut_ad(node.space->referenced()); return write_size != 0; } @@ -467,7 +457,6 @@ decrypt_failed: slot = buf_pool.io_buf_reserve(); ut_a(slot); slot->allocate(); - ut_d(fil_page_type_validate(node.space, dst_frame)); /* decrypt using crypt_buf to dst_frame */ if (!fil_space_decrypt(node.space, slot->crypt_buf, dst_frame)) { @@ -475,8 +464,6 @@ decrypt_failed: goto decrypt_failed; } - ut_d(fil_page_type_validate(node.space, dst_frame)); - if ((fil_space_t::full_crc32(flags) && page_compressed) || fil_page_get_type(dst_frame) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index cc2f72c9a62..17cca04b3a2 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -625,7 +625,6 @@ static byte *buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s, ut_ad(space->id == bpage->id().space()); ut_ad(!*slot); - ut_d(fil_page_type_validate(space, s)); const uint32_t page_no= bpage->id().page_no(); switch (page_no) { @@ -722,7 +721,6 @@ not_compressed: /* Workaround for MDEV-15527. */ memset(tmp + len, 0 , srv_page_size - len); - ut_d(fil_page_type_validate(space, tmp)); if (encrypted) tmp= fil_space_encrypt(space, page_no, tmp, d); @@ -737,7 +735,6 @@ not_compressed: d= tmp; } - ut_d(fil_page_type_validate(space, d)); (*slot)->out_buf= d; return d; } diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 516629fd98a..0bd5467cbfa 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -640,10 +640,7 @@ static dberr_t fil_space_decrypt_full_crc32( lsn_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN); uint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET); - ut_a(key_version != ENCRYPTION_KEY_NOT_ENCRYPTED); - - ut_ad(crypt_data); - ut_ad(crypt_data->is_encrypted()); + ut_ad(key_version != ENCRYPTION_KEY_NOT_ENCRYPTED); memcpy(tmp_frame, src_frame, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); @@ -699,8 +696,7 @@ static dberr_t fil_space_decrypt_for_non_full_checksum( src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN); - ut_a(key_version != ENCRYPTION_KEY_NOT_ENCRYPTED); - ut_a(crypt_data != NULL && crypt_data->is_encrypted()); + ut_ad(key_version != ENCRYPTION_KEY_NOT_ENCRYPTED); /* read space & lsn */ uint header_len = FIL_PAGE_DATA; @@ -753,8 +749,8 @@ static dberr_t fil_space_decrypt_for_non_full_checksum( @param[in] physical_size page size @param[in] fsp_flags Tablespace flags @param[in,out] src_frame Page to decrypt -@param[out] err DB_SUCCESS or DB_DECRYPTION_FAILED -@return DB_SUCCESS or error */ +@retval DB_SUCCESS on success +@retval DB_DECRYPTION_FAILED on error */ dberr_t fil_space_decrypt( ulint space_id, @@ -764,6 +760,10 @@ fil_space_decrypt( ulint fsp_flags, byte* src_frame) { + if (!crypt_data || !crypt_data->is_encrypted()) { + return DB_DECRYPTION_FAILED; + } + if (fil_space_t::full_crc32(fsp_flags)) { return fil_space_decrypt_full_crc32( space_id, crypt_data, tmp_frame, src_frame); @@ -780,7 +780,8 @@ Decrypt a page. @param[in] tmp_frame Temporary buffer used for decrypting @param[in,out] src_frame Page to decrypt @return decrypted page, or original not encrypted page if decryption is -not needed.*/ +not needed. +@retval nullptr on failure */ byte* fil_space_decrypt( const fil_space_t* space, @@ -789,7 +790,6 @@ fil_space_decrypt( { const ulint physical_size = space->physical_size(); - ut_ad(space->crypt_data != NULL && space->crypt_data->is_encrypted()); ut_ad(space->referenced()); if (DB_SUCCESS != fil_space_decrypt(space->id, space->crypt_data, @@ -800,9 +800,7 @@ fil_space_decrypt( /* Copy the decrypted page back to page buffer, not really any other options. */ - memcpy(src_frame, tmp_frame, physical_size); - - return src_frame; + return static_cast(memcpy(src_frame, tmp_frame, physical_size)); } /***********************************************************************/ diff --git a/storage/innobase/include/fil0crypt.h b/storage/innobase/include/fil0crypt.h index c3abdad90ed..26272761f43 100644 --- a/storage/innobase/include/fil0crypt.h +++ b/storage/innobase/include/fil0crypt.h @@ -296,7 +296,8 @@ byte* fil_space_encrypt( @param[in] physical_size page size @param[in] fsp_flags Tablespace flags @param[in,out] src_frame Page to decrypt -@return DB_SUCCESS or error */ +@retval DB_SUCCESS on success +@retval DB_DECRYPTION_FAILED on error */ dberr_t fil_space_decrypt( ulint space_id, @@ -312,7 +313,8 @@ Decrypt a page @param[in] tmp_frame Temporary buffer used for decrypting @param[in,out] src_frame Page to decrypt @return decrypted page, or original not encrypted page if decryption is -not needed.*/ +not needed. +@retval nullptr on failure */ byte* fil_space_decrypt( const fil_space_t* space, diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 8889604a919..fadaf36f83b 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -24,8 +24,7 @@ The low-level file system Created 10/25/1995 Heikki Tuuri *******************************************************/ -#ifndef fil0fil_h -#define fil0fil_h +#pragma once #include "fsp0types.h" #include "mach0data.h" @@ -1902,7 +1901,4 @@ void test_make_filepath(); @return block size */ ulint fil_space_get_block_size(const fil_space_t* space, unsigned offset); -#include "fil0fil.inl" #endif /* UNIV_INNOCHECKSUM */ - -#endif /* fil0fil_h */ diff --git a/storage/innobase/include/fil0fil.inl b/storage/innobase/include/fil0fil.inl deleted file mode 100644 index 3194e54c5b5..00000000000 --- a/storage/innobase/include/fil0fil.inl +++ /dev/null @@ -1,145 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2015, 2019, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/fil0fil.ic -The low-level file system support functions - -Created 31/03/2015 Jan Lindström -*******************************************************/ - -#ifndef fil0fil_ic -#define fil0fil_ic - -/*******************************************************************//** -Return page type name */ -UNIV_INLINE -const char* -fil_get_page_type_name( -/*===================*/ - ulint page_type) /*!< in: FIL_PAGE_TYPE */ -{ - switch(page_type) { - case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED: - return "PAGE_COMPRESSED_ENRYPTED"; - case FIL_PAGE_PAGE_COMPRESSED: - return "PAGE_COMPRESSED"; - case FIL_PAGE_TYPE_INSTANT: - case FIL_PAGE_INDEX: - return "INDEX"; - case FIL_PAGE_RTREE: - return "RTREE"; - case FIL_PAGE_UNDO_LOG: - return "UNDO LOG"; - case FIL_PAGE_INODE: - return "INODE"; - case FIL_PAGE_IBUF_FREE_LIST: - return "IBUF_FREE_LIST"; - case FIL_PAGE_TYPE_ALLOCATED: - return "ALLOCATED"; - case FIL_PAGE_IBUF_BITMAP: - return "IBUF_BITMAP"; - case FIL_PAGE_TYPE_SYS: - return "SYS"; - case FIL_PAGE_TYPE_TRX_SYS: - return "TRX_SYS"; - case FIL_PAGE_TYPE_FSP_HDR: - return "FSP_HDR"; - case FIL_PAGE_TYPE_XDES: - return "XDES"; - case FIL_PAGE_TYPE_BLOB: - return "BLOB"; - case FIL_PAGE_TYPE_ZBLOB: - return "ZBLOB"; - case FIL_PAGE_TYPE_ZBLOB2: - return "ZBLOB2"; - case FIL_PAGE_TYPE_UNKNOWN: - return "OLD UNKNOWN PAGE TYPE"; - default: - return "PAGE TYPE CORRUPTED"; - } -} - -#ifdef UNIV_DEBUG -/** Validate page type. -@param[in] space Tablespace object -@param[in] page page to validate -@return true if valid, false if not */ -UNIV_INLINE -bool -fil_page_type_validate( - fil_space_t* space, - const byte* page) -{ - const uint16_t page_type = fil_page_get_type(page); - - if ((page_type & 1U << FIL_PAGE_COMPRESS_FCRC32_MARKER) - && space->full_crc32() - && space->is_compressed()) { - return true; - } - - /* Validate page type */ - if (!((page_type == FIL_PAGE_PAGE_COMPRESSED || - page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED || - page_type == FIL_PAGE_INDEX || - page_type == FIL_PAGE_TYPE_INSTANT || - page_type == FIL_PAGE_RTREE || - page_type == FIL_PAGE_UNDO_LOG || - page_type == FIL_PAGE_INODE || - page_type == FIL_PAGE_IBUF_FREE_LIST || - page_type == FIL_PAGE_TYPE_ALLOCATED || - page_type == FIL_PAGE_IBUF_BITMAP || - page_type == FIL_PAGE_TYPE_SYS || - page_type == FIL_PAGE_TYPE_TRX_SYS || - page_type == FIL_PAGE_TYPE_FSP_HDR || - page_type == FIL_PAGE_TYPE_XDES || - page_type == FIL_PAGE_TYPE_BLOB || - page_type == FIL_PAGE_TYPE_ZBLOB || - page_type == FIL_PAGE_TYPE_ZBLOB2 || - page_type == FIL_PAGE_TYPE_UNKNOWN))) { - - ulint space_id = mach_read_from_4( - page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - - ulint offset = mach_read_from_4(page + FIL_PAGE_OFFSET); - - ulint key_version = mach_read_from_4( - page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - - if (space && space->full_crc32()) { - key_version = mach_read_from_4( - page + FIL_PAGE_FCRC32_KEY_VERSION); - } - - /* Dump out the page info */ - ib::fatal() << "Page " << space_id << ":" << offset - << " name " << (space && space->chain.start - ? space->chain.start->name : "???") - << " page_type " << page_type - << " key_version " << key_version - << " lsn " << mach_read_from_8(page + FIL_PAGE_LSN) - << " compressed_len " << mach_read_from_2(page + FIL_PAGE_DATA); - return false; - } - - return true; -} -#endif /* UNIV_DEBUG */ - -#endif /* fil0fil_ic */ -- cgit v1.2.1 From e8b0894dc8d8db630f7faf56c68138d6e39d7cdc Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Tue, 5 Apr 2022 15:47:09 +1000 Subject: MDEV-28243: AIX missing my_gethwaddr implementation and failing spider partition test. With some small datatype changes to the Linux/Solaris my_gethwaddr implementation the hardware address of AIX can be returned. This is an important aspect in Spider (and UUID). Spider test change reviewed by Nayuta Yanagisawa. my_gethwaddr review by Monty in #2081 --- mysys/my_gethwaddr.c | 19 ++++++++++++++----- storage/spider/mysql-test/spider/t/partition_mrr.test | 6 +++--- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/mysys/my_gethwaddr.c b/mysys/my_gethwaddr.c index 46b62a0166d..0fa4fb2f995 100644 --- a/mysys/my_gethwaddr.c +++ b/mysys/my_gethwaddr.c @@ -23,7 +23,7 @@ #ifndef MAIN -#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__linux__) || defined(__sun) || defined(_WIN32) +#if defined(_AIX) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__linux__) || defined(__sun) || defined(_WIN32) static my_bool memcpy_and_test(uchar *to, uchar *from, uint len) { uint i, res= 1; @@ -74,7 +74,7 @@ err: return res; } -#elif defined(__linux__) || defined(__sun) +#elif defined(_AIX) || defined(__linux__) || defined(__sun) #include #include #include @@ -87,11 +87,15 @@ err: my_bool my_gethwaddr(uchar *to) { int fd, res= 1; +#ifdef _AIX + struct ifhwaddr_req ifr[32]; +#else struct ifreq ifr[32]; +#endif struct ifconf ifc; DBUG_ENTER("my_gethwaddr"); - ifc.ifc_req= ifr; + ifc.ifc_req= (struct ifreq *) ifr; ifc.ifc_len= sizeof(ifr); fd = socket(AF_INET, SOCK_DGRAM, 0); @@ -106,9 +110,14 @@ my_bool my_gethwaddr(uchar *to) uint i; for (i= 0; res && i < ifc.ifc_len / sizeof(ifr[0]); i++) { -#ifdef __linux__ +#if !defined(_AIX) || !defined(__linux__) +#if defined(__linux___) +#define HWADDR_DATA ifr[i].ifr_hwaddr.sa_data +#else +#define HWADDR_DATA ifr[i].ifr_hwaddr +#endif if (ioctl(fd, SIOCGIFHWADDR, &ifr[i]) >= 0) - res= memcpy_and_test(to, (uchar *)&ifr[i].ifr_hwaddr.sa_data, + res= memcpy_and_test(to, (uchar *)&HWADDR_DATA, ETHER_ADDR_LEN); #else /* diff --git a/storage/spider/mysql-test/spider/t/partition_mrr.test b/storage/spider/mysql-test/spider/t/partition_mrr.test index 2816d65cadb..710e2781242 100644 --- a/storage/spider/mysql-test/spider/t/partition_mrr.test +++ b/storage/spider/mysql-test/spider/t/partition_mrr.test @@ -179,21 +179,21 @@ if ($USE_CHILD_GROUP2) --connection child2_1 if ($USE_GENERAL_LOG) { ---replace_regex /tmp_spider_bka_0x[0-9a-f]*/tmp_spider_bka_xxxx/ +--replace_regex /tmp_spider_bka_(0x)?[0-9a-f]*/tmp_spider_bka_xxxx/ eval $CHILD2_1_SELECT_ARGUMENT1; } eval $CHILD2_1_SELECT_TABLES; --connection child2_2 if ($USE_GENERAL_LOG) { ---replace_regex /tmp_spider_bka_0x[0-9a-f]*/tmp_spider_bka_xxxx/ +--replace_regex /tmp_spider_bka_(0x)?[0-9a-f]*/tmp_spider_bka_xxxx/ eval $CHILD2_2_SELECT_ARGUMENT1; } eval $CHILD2_2_SELECT_TABLES; --connection child2_3 if ($USE_GENERAL_LOG) { ---replace_regex /tmp_spider_bka_0x[0-9a-f]*/tmp_spider_bka_xxxx/ +--replace_regex /tmp_spider_bka_(0x)?[0-9a-f]*/tmp_spider_bka_xxxx/ eval $CHILD2_3_SELECT_ARGUMENT1; } eval $CHILD2_3_SELECT_TABLES; -- cgit v1.2.1 From 44ab6cba762e23a992e7c3bfd8f4319e56e08fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 8 Jun 2022 14:23:21 +0300 Subject: Cleanup: Remove unused error code DB_FORCED_ABORT MariaDB never supported this form of preemption via high-priority transactions. This error code shold not have been added in the first place, in commit 2e814d4702d71a04388386a9f591d14a35980bfe. --- storage/innobase/include/db0err.h | 5 +---- storage/innobase/ut/ut0ut.cc | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h index f70a65890c9..037821a89e7 100644 --- a/storage/innobase/include/db0err.h +++ b/storage/innobase/include/db0err.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2018, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -148,9 +148,6 @@ enum dberr_t { DB_IO_PARTIAL_FAILED, /*!< Partial IO request failed */ - DB_FORCED_ABORT, /*!< Transaction was forced to rollback - by a higher priority transaction */ - DB_TABLE_CORRUPT, /*!< Table/clustered index is corrupted */ diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc index 7f7be193175..8a4e1151101 100644 --- a/storage/innobase/ut/ut0ut.cc +++ b/storage/innobase/ut/ut0ut.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -486,9 +486,6 @@ ut_strerr( return("Table is encrypted but decrypt failed."); case DB_IO_PARTIAL_FAILED: return("Partial IO failed"); - case DB_FORCED_ABORT: - return("Transaction aborted by another higher priority " - "transaction"); case DB_COMPUTE_VALUE_FAILED: return("Compute generated column failed"); case DB_NO_FK_ON_S_BASE_COL: -- cgit v1.2.1 From 77b3959b5c1528f33ada7aa4445cccf5b5e197b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 8 Jun 2022 14:53:24 +0300 Subject: MDEV-28457 Crash in page_dir_find_owner_slot() A prominent remaining source of crashes on corrupted index pages is page directory corruption. A frequent caller of page_dir_find_owner_slot() is page_rec_get_prev(). Some of those calls can be replaced with simpler logic that is less prone to fail. page_dir_find_owner_slot(), page_rec_get_prev(), page_rec_get_prev_const(), btr_pcur_move_to_prev(), btr_pcur_move_to_prev_on_page(), btr_cur_upd_rec_sys(), page_delete_rec_list_end(), rtr_page_copy_rec_list_end_no_locks(), rtr_page_copy_rec_list_start_no_locks(): Return an error code on failure. fil_space_t::io(), buf_page_get_low(): Use DB_CORRUPTION for out-of-bounds page reads. PageBulk::getSplitRec(), PageBulk::copyOut(): Simplify the code. btr_validate_level(): Prevent some more CHECK TABLE crashes on corrupted pages. btr_block_get(), btr_pcur_move_to_next_page(): Implement some checks that were previously only part of IndexPurge::next(). IndexPurge::next(): Use btr_pcur_move_to_next_page(). --- storage/innobase/btr/btr0btr.cc | 108 +++++++++++++++++++----------- storage/innobase/btr/btr0bulk.cc | 35 +++++----- storage/innobase/btr/btr0cur.cc | 60 +++++++++++------ storage/innobase/btr/btr0defragment.cc | 8 ++- storage/innobase/btr/btr0pcur.cc | 27 +++++--- storage/innobase/btr/btr0sea.cc | 11 +-- storage/innobase/buf/buf0buf.cc | 3 +- storage/innobase/fil/fil0fil.cc | 2 +- storage/innobase/gis/gis0rtree.cc | 103 +++++++++------------------- storage/innobase/handler/handler0alter.cc | 8 +-- storage/innobase/ibuf/ibuf0ibuf.cc | 56 +++++++++++----- storage/innobase/include/btr0pcur.h | 3 +- storage/innobase/include/btr0pcur.inl | 7 +- storage/innobase/include/gis0rtree.h | 14 ++-- storage/innobase/include/page0cur.h | 3 +- storage/innobase/include/page0cur.inl | 4 +- storage/innobase/include/page0page.h | 24 ++++--- storage/innobase/include/page0page.inl | 34 +++------- storage/innobase/page/page0cur.cc | 22 ++++-- storage/innobase/page/page0page.cc | 56 ++++++++++------ storage/innobase/row/row0import.cc | 70 +++---------------- storage/innobase/row/row0merge.cc | 51 +++++++++----- storage/innobase/row/row0sel.cc | 20 +++++- 23 files changed, 391 insertions(+), 338 deletions(-) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 72375fc6c43..fc7aad3f84f 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -234,7 +234,9 @@ buf_block_t *btr_block_get(const dict_index_t &index, { if (!!page_is_comp(block->page.frame) != index.table->not_redundant() || btr_page_get_index_id(block->page.frame) != index.id || - !fil_page_index_page_check(block->page.frame)) + !fil_page_index_page_check(block->page.frame) || + index.is_spatial() != + (fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE)) { *err= DB_PAGE_CORRUPTED; block= nullptr; @@ -2716,11 +2718,9 @@ page_move_rec_list_end( ut_ad(new_data_size >= old_data_size); - page_delete_rec_list_end(split_rec, block, index, - new_n_recs - old_n_recs, - new_data_size - old_data_size, mtr); - - return DB_SUCCESS; + return page_delete_rec_list_end(split_rec, block, index, + new_n_recs - old_n_recs, + new_data_size - old_data_size, mtr); } /*************************************************************//** @@ -2980,10 +2980,15 @@ insert_empty: page_zip_copy_recs(new_block, page_zip, page, cursor->index, mtr); - page_delete_rec_list_end(move_limit - page + new_page, - new_block, cursor->index, - ULINT_UNDEFINED, - ULINT_UNDEFINED, mtr); + *err = page_delete_rec_list_end(move_limit + - page + new_page, + new_block, + cursor->index, + ULINT_UNDEFINED, + ULINT_UNDEFINED, mtr); + if (*err != DB_SUCCESS) { + return nullptr; + } /* Update the lock table and possible hash index. */ if (cursor->index->has_locking()) { @@ -3045,10 +3050,13 @@ insert_empty: /* Delete the records from the source page. */ - page_delete_rec_list_end(move_limit, block, - cursor->index, - ULINT_UNDEFINED, - ULINT_UNDEFINED, mtr); + *err = page_delete_rec_list_end(move_limit, block, + cursor->index, + ULINT_UNDEFINED, + ULINT_UNDEFINED, mtr); + if (*err != DB_SUCCESS) { + return nullptr; + } } left_block = block; @@ -4699,13 +4707,16 @@ btr_validate_level( default: err = e; } - ut_a(index->table->space_id == block->page.id().space()); - ut_a(block->page.id().space() == page_get_space_id(page)); + ut_ad(index->table->space_id == block->page.id().space()); + ut_ad(block->page.id().space() == page_get_space_id(page)); #ifdef UNIV_ZIP_DEBUG page_zip = buf_block_get_page_zip(block); ut_a(!page_zip || page_zip_validate(page_zip, page, index)); #endif /* UNIV_ZIP_DEBUG */ - ut_a(!page_is_leaf(page)); + if (page_is_leaf(page)) { + err = DB_CORRUPTION; + goto invalid_page; + } page_cur_set_before_first(block, &cursor); page_cur_move_to_next(&cursor); @@ -4833,7 +4844,11 @@ func_exit: err = DB_CORRUPTION; } - rec = page_rec_get_prev(page_get_supremum_rec(page)); + if (!(rec = page_rec_get_prev(page_get_supremum_rec(page)))) { + btr_validate_report1(index, level, block); + fputs("InnoDB: broken record links\n", stderr); + goto invalid_page; + } right_rec = page_rec_get_next(page_get_infimum_rec( right_page)); offsets = rec_get_offsets(rec, index, offsets, @@ -4857,10 +4872,12 @@ func_exit: fputs("InnoDB: records in wrong order" " on adjacent pages\n", stderr); - fputs("InnoDB: record ", stderr); rec = page_rec_get_prev(page_get_supremum_rec(page)); - rec_print(stderr, rec, index); - putc('\n', stderr); + if (rec) { + fputs("InnoDB: record ", stderr); + rec_print(stderr, rec, index); + putc('\n', stderr); + } fputs("InnoDB: record ", stderr); rec = page_rec_get_next( page_get_infimum_rec(right_page)); @@ -4905,15 +4922,17 @@ func_exit: rightmost_child = page_rec_is_supremum( page_rec_get_next(node_ptr)); - btr_cur_position( - index, - page_rec_get_prev(page_get_supremum_rec(page)), - block, &node_cur); + rec = page_rec_get_prev(page_get_supremum_rec(page)); + if (rec) { + btr_cur_position(index, rec, block, &node_cur); - offsets = btr_page_get_father_node_ptr_for_validate( + offsets = btr_page_get_father_node_ptr_for_validate( offsets, heap, &node_cur, &mtr); + } else { + offsets = nullptr; + } - if (node_ptr != btr_cur_get_rec(&node_cur) + if (!offsets || node_ptr != btr_cur_get_rec(&node_cur) || btr_node_ptr_get_child_page_no(node_ptr, offsets) != block->page.id().page_no()) { @@ -4925,14 +4944,17 @@ func_exit: fputs("InnoDB: node ptr ", stderr); rec_print(stderr, node_ptr, index); - rec = btr_cur_get_rec(&node_cur); - fprintf(stderr, "\n" - "InnoDB: node ptr child page n:o %u\n", - btr_node_ptr_get_child_page_no(rec, offsets)); + if (offsets) { + rec = btr_cur_get_rec(&node_cur); + fprintf(stderr, "\n" + "InnoDB: node ptr child page n:o %u\n", + btr_node_ptr_get_child_page_no( + rec, offsets)); + fputs("InnoDB: record on page ", stderr); + rec_print_new(stderr, rec, offsets); + putc('\n', stderr); + } - fputs("InnoDB: record on page ", stderr); - rec_print_new(stderr, rec, offsets); - putc('\n', stderr); err = DB_CORRUPTION; goto node_ptr_fails; } @@ -4963,15 +4985,21 @@ func_exit: } if (left_page_no == FIL_NULL) { - ut_a(node_ptr == page_rec_get_next( - page_get_infimum_rec(father_page))); - ut_a(!page_has_prev(father_page)); + if (page_has_prev(father_page) + || node_ptr != page_rec_get_next( + page_get_infimum_rec(father_page))) { + err = DB_CORRUPTION; + goto node_ptr_fails; + } } if (right_page_no == FIL_NULL) { - ut_a(node_ptr == page_rec_get_prev( - page_get_supremum_rec(father_page))); - ut_a(!page_has_next(father_page)); + if (page_has_next(father_page) + || node_ptr != page_rec_get_prev( + page_get_supremum_rec(father_page))) { + err = DB_CORRUPTION; + goto node_ptr_fails; + } } else { const rec_t* right_node_ptr; diff --git a/storage/innobase/btr/btr0bulk.cc b/storage/innobase/btr/btr0bulk.cc index 538cb06e654..7af5f862aab 100644 --- a/storage/innobase/btr/btr0bulk.cc +++ b/storage/innobase/btr/btr0bulk.cc @@ -637,7 +637,7 @@ PageBulk::getSplitRec() < total_used_size / 2); /* Keep at least one record on left page */ - if (page_rec_is_infimum(page_rec_get_prev(rec))) { + if (page_rec_is_second(rec, m_page)) { rec = page_rec_get_next(rec); ut_ad(page_rec_is_user_rec(rec)); } @@ -679,35 +679,40 @@ void PageBulk::copyOut( rec_t* split_rec) { - rec_t* rec; - rec_t* last_rec; - ulint n; - /* Suppose before copyOut, we have 5 records on the page: infimum->r1->r2->r3->r4->r5->supremum, and r3 is the split rec. after copyOut, we have 2 records on the page: infimum->r1->r2->supremum. slot ajustment is not done. */ - rec = page_rec_get_next(page_get_infimum_rec(m_page)); - last_rec = page_rec_get_prev(page_get_supremum_rec(m_page)); - n = 0; + rec_t *rec = page_get_infimum_rec(m_page); + ulint n; - while (rec != split_rec) { - rec = page_rec_get_next(rec); - n++; + for (n = 0;; n++) { + rec_t *next = page_rec_get_next(rec); + if (next == split_rec) { + break; + } + rec = next; } ut_ad(n > 0); + const rec_t *last_rec = split_rec; + for (;;) { + const rec_t *next = page_rec_get_next_const(last_rec); + if (page_rec_is_supremum(next)) { + break; + } + last_rec = next; + } + /* Set last record's next in page */ - rec_offs* offsets = NULL; - rec = page_rec_get_prev(split_rec); const ulint n_core = page_rec_is_leaf(split_rec) ? m_index->n_core_fields : 0; - offsets = rec_get_offsets(rec, m_index, offsets, n_core, - ULINT_UNDEFINED, &m_heap); + rec_offs* offsets = rec_get_offsets(rec, m_index, nullptr, n_core, + ULINT_UNDEFINED, &m_heap); mach_write_to_2(rec - REC_NEXT, m_is_comp ? static_cast (PAGE_NEW_SUPREMUM - page_offset(rec)) diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 3499e9d84fe..981a80adba0 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -2132,12 +2132,10 @@ need_opposite_intention: if (matched_fields >= rec_offs_n_fields(offsets) - 1) { detected_same_key_root = true; - } else { - const rec_t* last_rec; - - last_rec = page_rec_get_prev_const( - page_get_supremum_rec(page)); - + } else if (const rec_t* last_rec + = page_rec_get_prev_const( + page_get_supremum_rec( + page))) { matched_fields = 0; offsets2 = rec_get_offsets( @@ -2151,6 +2149,9 @@ need_opposite_intention: >= rec_offs_n_fields(offsets) - 1) { detected_same_key_root = true; } + } else { + err = DB_CORRUPTION; + goto func_exit; } } } @@ -2710,7 +2711,10 @@ btr_cur_open_at_index_side( if (from_left) { page_cur_move_to_next(page_cursor); } else { - page_cur_move_to_prev(page_cursor); + if (!page_cur_move_to_prev(page_cursor)) { + err = DB_CORRUPTION; + goto exit_loop; + } } if (estimate) { @@ -2800,7 +2804,7 @@ btr_cur_open_at_index_side( } exit_loop: - if (heap) { + if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } @@ -3845,6 +3849,7 @@ static void btr_cur_write_sys( trx_write_roll_ptr(static_cast(r->data), roll_ptr); } +MY_ATTRIBUTE((warn_unused_result)) /** Update DB_TRX_ID, DB_ROLL_PTR in a clustered index record. @param[in,out] block clustered index leaf page @param[in,out] rec clustered index record @@ -3852,11 +3857,12 @@ static void btr_cur_write_sys( @param[in] offsets rec_get_offsets(rec, index) @param[in] trx transaction @param[in] roll_ptr DB_ROLL_PTR value -@param[in,out] mtr mini-transaction */ -static void btr_cur_upd_rec_sys(buf_block_t *block, rec_t *rec, - dict_index_t *index, const rec_offs *offsets, - const trx_t *trx, roll_ptr_t roll_ptr, - mtr_t *mtr) +@param[in,out] mtr mini-transaction +@return error code */ +static dberr_t btr_cur_upd_rec_sys(buf_block_t *block, rec_t *rec, + dict_index_t *index, const rec_offs *offsets, + const trx_t *trx, roll_ptr_t roll_ptr, + mtr_t *mtr) { ut_ad(index->is_primary()); ut_ad(rec_offs_validate(rec, index, offsets)); @@ -3865,7 +3871,7 @@ static void btr_cur_upd_rec_sys(buf_block_t *block, rec_t *rec, { page_zip_write_trx_id_and_roll_ptr(block, rec, offsets, index->db_trx_id(), trx->id, roll_ptr, mtr); - return; + return DB_SUCCESS; } ulint offset= index->trx_id_offset; @@ -3895,8 +3901,8 @@ static void btr_cur_upd_rec_sys(buf_block_t *block, rec_t *rec, if (UNIV_LIKELY(index->trx_id_offset)) { const rec_t *prev= page_rec_get_prev_const(rec); - if (UNIV_UNLIKELY(prev == rec)) - ut_ad(0); + if (UNIV_UNLIKELY(!prev || prev == rec)) + return DB_CORRUPTION; else if (page_rec_is_infimum(prev)); else for (src= prev + offset; d < DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; d++) @@ -3934,6 +3940,8 @@ static void btr_cur_upd_rec_sys(buf_block_t *block, rec_t *rec, if (UNIV_LIKELY(len)) /* extra safety, to avoid corrupting the log */ mtr->memcpy(*block, dest, sys + d, len); + + return DB_SUCCESS; } /*************************************************************//** @@ -4239,8 +4247,11 @@ btr_cur_update_in_place( } if (!(flags & BTR_KEEP_SYS_FLAG)) { - btr_cur_upd_rec_sys(block, rec, index, offsets, - thr_get_trx(thr), roll_ptr, mtr); + err = btr_cur_upd_rec_sys(block, rec, index, offsets, + thr_get_trx(thr), roll_ptr, mtr); + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto func_exit; + } } was_delete_marked = rec_get_deleted_flag( @@ -4694,7 +4705,9 @@ any_extern: page_cur_delete_rec(page_cursor, index, *offsets, mtr); - page_cur_move_to_prev(page_cursor); + if (!page_cur_move_to_prev(page_cursor)) { + return DB_CORRUPTION; + } if (!(flags & BTR_KEEP_SYS_FLAG)) { btr_cur_write_sys(new_entry, index, trx_id, roll_ptr); @@ -5056,7 +5069,10 @@ btr_cur_pessimistic_update( page_cur_delete_rec(page_cursor, index, *offsets, mtr); - page_cur_move_to_prev(page_cursor); + if (!page_cur_move_to_prev(page_cursor)) { + err = DB_CORRUPTION; + goto return_after_reservations; + } rec = btr_cur_insert_if_possible(cursor, new_entry, offsets, offsets_heap, n_ext, mtr); @@ -5354,8 +5370,8 @@ btr_cur_del_mark_set_clust_rec( << ib::hex(trx->id) << ": " << rec_printer(rec, offsets).str()); - btr_cur_upd_rec_sys(block, rec, index, offsets, trx, roll_ptr, mtr); - return(err); + return btr_cur_upd_rec_sys(block, rec, index, offsets, trx, roll_ptr, + mtr); } /*==================== B-TREE RECORD REMOVE =========================*/ diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 23d93caecf5..5a278fbd9a2 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -707,9 +707,11 @@ processed: page_t* last_page = buf_block_get_frame(last_block); rec_t* rec = page_rec_get_prev( page_get_supremum_rec(last_page)); - ut_a(page_rec_is_user_rec(rec)); - page_cur_position(rec, last_block, - btr_pcur_get_page_cur(item->pcur)); + if (rec && page_rec_is_user_rec(rec)) { + page_cur_position(rec, last_block, + btr_pcur_get_page_cur( + item->pcur)); + } btr_pcur_store_position(item->pcur, &mtr); mtr_commit(&mtr); /* Update the last_processed time of this index. */ diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc index 8c33fee9e61..c45ce3be89e 100644 --- a/storage/innobase/btr/btr0pcur.cc +++ b/storage/innobase/btr/btr0pcur.cc @@ -148,6 +148,11 @@ before_first: if (page_rec_is_supremum_low(offs)) { rec = page_rec_get_prev(rec); + if (UNIV_UNLIKELY(!rec || page_rec_is_infimum(rec))) { + ut_ad("corrupted index" == 0); + cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE; + return; + } ut_ad(!page_rec_is_infimum(rec)); if (UNIV_UNLIKELY(rec_is_metadata(rec, *index))) { @@ -486,7 +491,17 @@ btr_pcur_move_to_next_page( const page_t* page = btr_pcur_get_page(cursor); const uint32_t next_page_no = btr_page_get_next(page); - ut_ad(next_page_no != FIL_NULL); + switch (next_page_no) { + case 0: + case 1: + case FIL_NULL: + return DB_CORRUPTION; + } + + if (UNIV_UNLIKELY(next_page_no == btr_pcur_get_block(cursor) + ->page.id().page_no())) { + return DB_CORRUPTION; + } ulint mode = cursor->latch_mode; switch (mode) { @@ -599,13 +614,9 @@ btr_pcur_move_to_prev( cursor->old_stored = false; if (btr_pcur_is_before_first_on_page(cursor)) { - if (btr_pcur_is_before_first_in_tree(cursor) - || btr_pcur_move_backward_from_page(cursor, mtr)) { - return false; - } - } else { - btr_pcur_move_to_prev_on_page(cursor); + return (!btr_pcur_is_before_first_in_tree(cursor) + && !btr_pcur_move_backward_from_page(cursor, mtr)); } - return true; + return btr_pcur_move_to_prev_on_page(cursor) != nullptr; } diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index a9b4e9ff0d0..53cc464f18f 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -781,7 +781,7 @@ btr_search_check_guess( mem_heap_t* heap = NULL; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs* offsets = offsets_; - ibool success = FALSE; + bool success = false; rec_offs_init(offsets_); n_unique = dict_index_get_n_unique_in_tree(cursor->index); @@ -806,7 +806,7 @@ btr_search_check_guess( cursor->up_match = match; if (match >= n_unique) { - success = TRUE; + success = true; goto exit_func; } } else if (mode == PAGE_CUR_LE) { @@ -835,10 +835,13 @@ btr_search_check_guess( match = 0; if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) { - ut_ad(!page_rec_is_infimum(rec)); - const rec_t* prev_rec = page_rec_get_prev(rec); + if (UNIV_UNLIKELY(!prev_rec)) { + ut_ad("corrupted index" == 0); + goto exit_func; + } + if (page_rec_is_infimum(prev_rec)) { success = !page_has_prev(page_align(prev_rec)); goto exit_func; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index e6a6bd8cbb7..e0632513584 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2587,7 +2587,8 @@ loop: checksum cannot be decypted. */ if (dberr_t local_err = buf_read_page(page_id, zip_size)) { - if (mode != BUF_GET_POSSIBLY_FREED + if (local_err != DB_CORRUPTION + && mode != BUF_GET_POSSIBLY_FREED && retries++ < BUF_PAGE_READ_MAX_RETRIES) { DBUG_EXECUTE_IF("intermittent_read_failure", retries = BUF_PAGE_READ_MAX_RETRIES;); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index de94ec5f0c1..18f68aa5dd6 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2860,7 +2860,7 @@ fail: io_error: #endif set_corrupted(); - err = DB_IO_ERROR; + err = DB_CORRUPTION; node = nullptr; goto release; } diff --git a/storage/innobase/gis/gis0rtree.cc b/storage/innobase/gis/gis0rtree.cc index 8a5e1dc3ac9..df5d6aa1156 100644 --- a/storage/innobase/gis/gis0rtree.cc +++ b/storage/innobase/gis/gis0rtree.cc @@ -718,7 +718,6 @@ rtr_split_page_move_rec_list( page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block); rec_t* rec; - rec_t* ret; ulint moved = 0; ulint max_to_move = 0; rtr_rec_move_t* rec_move = NULL; @@ -733,7 +732,6 @@ rtr_split_page_move_rec_list( page = buf_block_get_frame(block); new_page = buf_block_get_frame(new_block); - ret = page_rec_get_prev(page_get_supremum_rec(new_page)); end_split_node = node_array + page_get_n_recs(page); @@ -804,32 +802,15 @@ rtr_split_page_move_rec_list( if (!page_zip_compress(new_block, index, page_zip_level, mtr)) { - /* Before trying to reorganize the page, - store the number of preceding records on the page. */ - ulint ret_pos = page_rec_get_n_recs_before(ret); - /* Before copying, "ret" was the predecessor - of the predefined supremum record. If it was - the predefined infimum record, then it would - still be the infimum, and we would have - ret_pos == 0. */ - - switch (dberr_t err = + if (dberr_t err = page_zip_reorganize(new_block, index, page_zip_level, mtr)) { - case DB_FAIL: - if (UNIV_UNLIKELY - (!page_zip_decompress(new_page_zip, - new_page, FALSE))) { - ut_error; + if (err == DB_FAIL) { + ut_a(page_zip_decompress(new_page_zip, + new_page, + FALSE)); } -#ifdef UNIV_GIS_DEBUG - ut_ad(page_validate(new_page, index)); -#endif - /* fall through */ - default: return err; - case DB_SUCCESS: - ret = page_rec_get_nth(new_page, ret_pos); } } } @@ -1284,14 +1265,9 @@ rtr_ins_enlarge_mbr( /*************************************************************//** Copy recs from a page to new_block of rtree. -Differs from page_copy_rec_list_end, because this function does not -touch the lock table and max trx id on page or compress the page. -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). */ -void +@return error code */ +dberr_t rtr_page_copy_rec_list_end_no_locks( /*================================*/ buf_block_t* new_block, /*!< in: index page to copy to */ @@ -1355,8 +1331,7 @@ rtr_page_copy_rec_list_end_no_locks( offsets1, offsets2, index, false, &cur_matched_fields); if (cmp < 0) { - page_cur_move_to_prev(&page_cur); - break; + goto move_to_prev; } else if (cmp > 0) { /* Skip small recs. */ page_cur_move_to_next(&page_cur); @@ -1379,26 +1354,23 @@ rtr_page_copy_rec_list_end_no_locks( /* If position is on suprenum rec, need to move to previous rec. */ if (page_rec_is_supremum(cur_rec)) { - page_cur_move_to_prev(&page_cur); +move_to_prev: + cur_rec = page_cur_move_to_prev(&page_cur); + } else { + cur_rec = page_cur_get_rec(&page_cur); } - cur_rec = page_cur_get_rec(&page_cur); + if (UNIV_UNLIKELY(!cur_rec)) { + return DB_CORRUPTION; + } offsets1 = rec_get_offsets(cur1_rec, index, offsets1, n_core, ULINT_UNDEFINED, &heap); ins_rec = page_cur_insert_rec_low(&page_cur, index, cur1_rec, offsets1, mtr); - if (UNIV_UNLIKELY(!ins_rec)) { - fprintf(stderr, "page number %u and %u\n", - new_block->page.id().page_no(), - block->page.id().page_no()); - - ib::fatal() << "rec offset " << page_offset(rec) - << ", cur1 offset " - << page_offset(page_cur_get_rec(&cur1)) - << ", cur_rec offset " - << page_offset(cur_rec); + if (UNIV_UNLIKELY(!ins_rec || moved >= max_move)) { + return DB_CORRUPTION; } rec_move[moved].new_rec = ins_rec; @@ -1406,20 +1378,18 @@ rtr_page_copy_rec_list_end_no_locks( rec_move[moved].moved = false; moved++; next: - if (moved > max_move) { - ut_ad(0); - break; - } - page_cur_move_to_next(&cur1); } *num_moved = moved; + return DB_SUCCESS; } /*************************************************************//** -Copy recs till a specified rec from a page to new_block of rtree. */ -void +Copy recs till a specified rec from a page to new_block of rtree. + +@return error code */ +dberr_t rtr_page_copy_rec_list_start_no_locks( /*==================================*/ buf_block_t* new_block, /*!< in: index page to copy to */ @@ -1474,9 +1444,7 @@ rtr_page_copy_rec_list_start_no_locks( offsets1, offsets2, index, false, &cur_matched_fields); if (cmp < 0) { - page_cur_move_to_prev(&page_cur); - cur_rec = page_cur_get_rec(&page_cur); - break; + goto move_to_prev; } else if (cmp > 0) { /* Skip small recs. */ page_cur_move_to_next(&page_cur); @@ -1500,23 +1468,22 @@ rtr_page_copy_rec_list_start_no_locks( /* If position is on suprenum rec, need to move to previous rec. */ if (page_rec_is_supremum(cur_rec)) { - page_cur_move_to_prev(&page_cur); +move_to_prev: + cur_rec = page_cur_move_to_prev(&page_cur); + if (UNIV_UNLIKELY(!cur_rec)) { + return DB_CORRUPTION; + } + } else { + cur_rec = page_cur_get_rec(&page_cur); } - cur_rec = page_cur_get_rec(&page_cur); - offsets1 = rec_get_offsets(cur1_rec, index, offsets1, n_core, ULINT_UNDEFINED, &heap); ins_rec = page_cur_insert_rec_low(&page_cur, index, cur1_rec, offsets1, mtr); - if (UNIV_UNLIKELY(!ins_rec)) { - ib::fatal() << new_block->page.id() - << "rec offset " << page_offset(rec) - << ", cur1 offset " - << page_offset(page_cur_get_rec(&cur1)) - << ", cur_rec offset " - << page_offset(cur_rec); + if (UNIV_UNLIKELY(!ins_rec || moved >= max_move)) { + return DB_CORRUPTION; } rec_move[moved].new_rec = ins_rec; @@ -1524,15 +1491,11 @@ rtr_page_copy_rec_list_start_no_locks( rec_move[moved].moved = false; moved++; next: - if (moved > max_move) { - ut_ad(0); - break; - } - page_cur_move_to_next(&cur1); } *num_moved = moved; + return DB_SUCCESS; } /****************************************************************//** diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index d63f5b33654..57eecd47ebb 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -1993,7 +1993,7 @@ static bool innobase_table_is_empty(const dict_table_t *table, btr_pcur_t pcur; buf_block_t *block; page_cur_t *cur; - const rec_t *rec; + rec_t *rec; bool next_page= false; mtr.start(); @@ -2004,9 +2004,9 @@ non_empty: mtr.commit(); return false; } - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - if (!rec_is_metadata(btr_pcur_get_rec(&pcur), *clust_index)) - btr_pcur_move_to_prev_on_page(&pcur); + rec= page_rec_get_next(btr_pcur_get_rec(&pcur)); + if (rec_is_metadata(rec, *clust_index)) + btr_pcur_get_page_cur(&pcur)->rec= rec; scan_leaf: cur= btr_pcur_get_page_cur(&pcur); page_cur_move_to_next(cur); diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 94c01f5108a..6fb41cfbd81 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -2035,23 +2035,25 @@ ibuf_get_merge_page_nos_func( *n_stored = 0; - limit = ut_min(IBUF_MAX_N_PAGES_MERGED, - buf_pool_get_curr_size() / 4); - if (page_rec_is_supremum(rec)) { rec = page_rec_get_prev_const(rec); + if (UNIV_UNLIKELY(!rec)) { +corruption: + ut_ad("corrupted page" == 0); + return 0; + } } if (page_rec_is_infimum(rec)) { - rec = page_rec_get_next_const(rec); + if (page_rec_is_supremum(rec)) { + return 0; + } } - if (page_rec_is_supremum(rec)) { - - return(0); - } + limit = ut_min(IBUF_MAX_N_PAGES_MERGED, + buf_pool_get_curr_size() / 4); first_page_no = ibuf_rec_get_page_no(mtr, rec); first_space_id = ibuf_rec_get_space(mtr, rec); @@ -2083,7 +2085,9 @@ ibuf_get_merge_page_nos_func( prev_page_no = rec_page_no; prev_space_id = rec_space_id; - rec = page_rec_get_prev_const(rec); + if (UNIV_UNLIKELY(!(rec = page_rec_get_prev_const(rec)))) { + goto corruption; + } } rec = page_rec_get_next_const(rec); @@ -2809,14 +2813,16 @@ ibuf_get_volume_buffered( page = page_align(rec); ut_ad(page_validate(page, ibuf.index)); - if (page_rec_is_supremum(rec)) { - rec = page_rec_get_prev_const(rec); + if (page_rec_is_supremum(rec) + && UNIV_UNLIKELY(!(rec = page_rec_get_prev_const(rec)))) { +corruption: + ut_ad("corrupted page" == 0); + return srv_page_size; } uint32_t prev_page_no; - for (; !page_rec_is_infimum(rec); - rec = page_rec_get_prev_const(rec)) { + for (; !page_rec_is_infimum(rec); ) { ut_ad(page_align(rec) == page); if (page_no != ibuf_rec_get_page_no(mtr, rec) @@ -2828,6 +2834,10 @@ ibuf_get_volume_buffered( volume += ibuf_get_volume_buffered_count( mtr, rec, hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs); + + if (UNIV_UNLIKELY(!(rec = page_rec_get_prev_const(rec)))) { + goto corruption; + } } /* Look at the previous page */ @@ -2853,13 +2863,16 @@ ibuf_get_volume_buffered( if (UNIV_UNLIKELY(memcmp_aligned<4>(prev_page + FIL_PAGE_NEXT, page + FIL_PAGE_OFFSET, 4))) { - return 0; + return srv_page_size; } - rec = page_get_supremum_rec(prev_page); - rec = page_rec_get_prev_const(rec); + rec = page_rec_get_prev_const(page_get_supremum_rec(prev_page)); - for (;; rec = page_rec_get_prev_const(rec)) { + if (UNIV_UNLIKELY(!rec)) { + goto corruption; + } + + for (;;) { ut_ad(page_align(rec) == prev_page); if (page_rec_is_infimum(rec)) { @@ -2880,6 +2893,10 @@ ibuf_get_volume_buffered( volume += ibuf_get_volume_buffered_count( mtr, rec, hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs); + + if (UNIV_UNLIKELY(!(rec = page_rec_get_prev_const(rec)))) { + goto corruption; + } } count_later: @@ -3801,7 +3818,10 @@ ibuf_insert_to_index_page( buffered one. */ page_cur_delete_rec(&page_cur, index, offsets, mtr); - page_cur_move_to_prev(&page_cur); + if (!(page_cur_move_to_prev(&page_cur))) { + err = DB_CORRUPTION; + goto updated_in_place; + } } else { offsets = NULL; } diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index 5fc4e28527f..f2a1f4220da 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -331,10 +331,11 @@ void btr_pcur_move_to_next_on_page( /*==========================*/ btr_pcur_t* cursor);/*!< in/out: persistent cursor */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) /*********************************************************//** Moves the persistent cursor to the previous record on the same page. */ UNIV_INLINE -void +rec_t* btr_pcur_move_to_prev_on_page( /*==========================*/ btr_pcur_t* cursor);/*!< in/out: persistent cursor */ diff --git a/storage/innobase/include/btr0pcur.inl b/storage/innobase/include/btr0pcur.inl index fd4eeb9392a..b21de209760 100644 --- a/storage/innobase/include/btr0pcur.inl +++ b/storage/innobase/include/btr0pcur.inl @@ -171,17 +171,16 @@ btr_pcur_move_to_next_on_page( /*********************************************************//** Moves the persistent cursor to the previous record on the same page. */ UNIV_INLINE -void +rec_t* btr_pcur_move_to_prev_on_page( /*==========================*/ btr_pcur_t* cursor) /*!< in/out: persistent cursor */ { ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - page_cur_move_to_prev(btr_pcur_get_page_cur(cursor)); - cursor->old_stored = false; + + return page_cur_move_to_prev(btr_pcur_get_page_cur(cursor)); } /*********************************************************//** diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h index 4e10b90173e..8cd5e384530 100644 --- a/storage/innobase/include/gis0rtree.h +++ b/storage/innobase/include/gis0rtree.h @@ -337,9 +337,12 @@ rtr_get_parent_cursor( ulint level, /*!< in: index level of buffer page */ ulint is_insert); /*!< in: whether insert operation */ +MY_ATTRIBUTE((warn_unused_result)) /*************************************************************//** -Copy recs from a page to new_block of rtree. */ -void +Copy recs from a page to new_block of rtree. + +@return error code */ +dberr_t rtr_page_copy_rec_list_end_no_locks( /*================================*/ buf_block_t* new_block, /*!< in: index page to copy to */ @@ -352,9 +355,12 @@ rtr_page_copy_rec_list_end_no_locks( ulint* num_moved, /*!< out: num of rec to move */ mtr_t* mtr); /*!< in: mtr */ +MY_ATTRIBUTE((warn_unused_result)) /*************************************************************//** -Copy recs till a specified rec from a page to new_block of rtree. */ -void +Copy recs till a specified rec from a page to new_block of rtree. + +@return error code */ +dberr_t rtr_page_copy_rec_list_start_no_locks( /*==================================*/ buf_block_t* new_block, /*!< in: index page to copy to */ diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h index d80eb4567e5..e715df19741 100644 --- a/storage/innobase/include/page0cur.h +++ b/storage/innobase/include/page0cur.h @@ -120,10 +120,11 @@ void page_cur_move_to_next( /*==================*/ page_cur_t* cur); /*!< in/out: cursor; must not be after last */ +MY_ATTRIBUTE((nonnull, warn_unused_result)) /**********************************************************//** Moves the cursor to the previous record on page. */ UNIV_INLINE -void +rec_t* page_cur_move_to_prev( /*==================*/ page_cur_t* cur); /*!< in/out: cursor; not before first */ diff --git a/storage/innobase/include/page0cur.inl b/storage/innobase/include/page0cur.inl index 5ee96dd716d..6f7c633561f 100644 --- a/storage/innobase/include/page0cur.inl +++ b/storage/innobase/include/page0cur.inl @@ -168,14 +168,14 @@ page_cur_move_to_next( /**********************************************************//** Moves the cursor to the previous record on page. */ UNIV_INLINE -void +rec_t* page_cur_move_to_prev( /*==================*/ page_cur_t* cur) /*!< in/out: page cursor, not before first */ { ut_ad(!page_cur_is_before_first(cur)); - cur->rec = page_rec_get_prev(cur->rec); + return cur->rec = page_rec_get_prev(cur->rec); } /** Search the right position for a page cursor. diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index a4a86791cd8..4787ce36c7a 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -669,7 +669,8 @@ page_dir_calc_reserved_space( ulint n_recs); /*!< in: number of records */ /***************************************************************//** Looks for the directory slot which owns the given record. -@return the directory slot number */ +@return the directory slot number +@retval ULINT_UNDEFINED on corruption */ ulint page_dir_find_owner_slot( /*=====================*/ @@ -763,7 +764,8 @@ page_rec_get_next_non_del_marked( const rec_t* rec); /*!< in: pointer to record */ /************************************************************//** Gets the pointer to the previous record. -@return pointer to previous record */ +@return pointer to previous record +@retval nullptr on error */ UNIV_INLINE const rec_t* page_rec_get_prev_const( @@ -772,13 +774,13 @@ page_rec_get_prev_const( infimum */ /************************************************************//** Gets the pointer to the previous record. -@return pointer to previous record */ -UNIV_INLINE -rec_t* -page_rec_get_prev( -/*==============*/ - rec_t* rec); /*!< in: pointer to record, - must not be page infimum */ +@param rec record (not page infimum) +@return pointer to previous record +@retval nullptr on error */ +inline rec_t *page_rec_get_prev(rec_t *rec) +{ + return const_cast(page_rec_get_prev_const(rec)); +} /************************************************************//** true if the record is the first user record on a page. @@ -997,7 +999,7 @@ page_copy_rec_list_start( /*************************************************************//** Deletes records from a page from a given record onward, including that record. The infimum and supremum records are not deleted. */ -void +dberr_t page_delete_rec_list_end( /*=====================*/ rec_t* rec, /*!< in: pointer to record on page */ @@ -1009,7 +1011,7 @@ page_delete_rec_list_end( records in the end of the chain to delete, or ULINT_UNDEFINED if not known */ mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); + MY_ATTRIBUTE((nonnull, warn_unused_result)); /*************************************************************//** Deletes records from page, up to the given record, NOT including that record. Infimum and supremum records are not deleted. */ diff --git a/storage/innobase/include/page0page.inl b/storage/innobase/include/page0page.inl index 861bf4a53df..76bc62e5eb2 100644 --- a/storage/innobase/include/page0page.inl +++ b/storage/innobase/include/page0page.inl @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2021, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -24,9 +24,6 @@ Index page routines Created 2/2/1994 Heikki Tuuri *******************************************************/ -#ifndef page0page_ic -#define page0page_ic - #ifndef UNIV_INNOCHECKSUM #include "rem0cmp.h" #include "mtr0log.h" @@ -506,7 +503,8 @@ page_rec_get_next_non_del_marked( /************************************************************//** Gets the pointer to the previous record. -@return pointer to previous record */ +@return pointer to previous record +@retval nullptr on error */ UNIV_INLINE const rec_t* page_rec_get_prev_const( @@ -528,42 +526,28 @@ page_rec_get_prev_const( slot_no = page_dir_find_owner_slot(rec); - ut_a(slot_no != 0); + if (UNIV_UNLIKELY(!slot_no || slot_no == ULINT_UNDEFINED)) { + return nullptr; + } slot = page_dir_get_nth_slot(page, slot_no - 1); rec2 = page_dir_slot_get_rec(slot); if (page_is_comp(page)) { - while (rec != rec2) { + while (rec2 && rec != rec2) { prev_rec = rec2; rec2 = page_rec_get_next_low(rec2, TRUE); } } else { - while (rec != rec2) { + while (rec2 && rec != rec2) { prev_rec = rec2; rec2 = page_rec_get_next_low(rec2, FALSE); } } - ut_a(prev_rec); - return(prev_rec); } - -/************************************************************//** -Gets the pointer to the previous record. -@return pointer to previous record */ -UNIV_INLINE -rec_t* -page_rec_get_prev( -/*==============*/ - rec_t* rec) /*!< in: pointer to record, must not be page - infimum */ -{ - return((rec_t*) page_rec_get_prev_const(rec)); -} - #endif /* UNIV_INNOCHECKSUM */ /************************************************************//** @@ -720,5 +704,3 @@ page_get_instant(const page_t* page) return static_cast(i >> 3); /* i / 8 */ } #endif /* !UNIV_INNOCHECKSUM */ - -#endif diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index bd0905e80dc..e608abaa361 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -1621,7 +1621,9 @@ copied: if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) { - const auto owner= page_dir_find_owner_slot(next_rec); + const ulint owner= page_dir_find_owner_slot(next_rec); + if (UNIV_UNLIKELY(owner == ULINT_UNDEFINED)) + return nullptr; page_dir_split_slot(*block, page_dir_get_nth_slot(block->page.frame, owner)); } @@ -2047,8 +2049,12 @@ inc_dir: record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED, we have to split the corresponding directory slot in two. */ if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) - page_zip_dir_split_slot(cursor->block, - page_dir_find_owner_slot(next_rec), mtr); + { + const ulint owner= page_dir_find_owner_slot(next_rec); + if (UNIV_UNLIKELY(owner == ULINT_UNDEFINED)) + return nullptr; + page_zip_dir_split_slot(cursor->block, owner, mtr); + } page_zip_write_rec(cursor->block, insert_rec, index, offsets, 1, mtr); return insert_rec; @@ -2144,7 +2150,6 @@ page_cur_delete_rec( rec_t* current_rec; rec_t* prev_rec = NULL; rec_t* next_rec; - ulint cur_slot_no; ulint cur_n_owned; rec_t* rec; @@ -2188,8 +2193,13 @@ page_cur_delete_rec( } /* Save to local variables some data associated with current_rec */ - cur_slot_no = page_dir_find_owner_slot(current_rec); - ut_ad(cur_slot_no > 0); + ulint cur_slot_no = page_dir_find_owner_slot(current_rec); + + if (UNIV_UNLIKELY(!cur_slot_no || cur_slot_no == ULINT_UNDEFINED)) { + /* Avoid crashing due to a corrupted page. */ + return; + } + cur_dir_slot = page_dir_get_nth_slot(block->page.frame, cur_slot_no); cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot); diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index bb2c267c633..22d3983072b 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -82,7 +82,8 @@ is 50 x 4 bytes = 200 bytes. */ /***************************************************************//** Looks for the directory slot which owns the given record. -@return the directory slot number */ +@return the directory slot number +@retval ULINT_UNDEFINED on corruption */ ulint page_dir_find_owner_slot( /*=====================*/ @@ -135,7 +136,7 @@ page_dir_find_owner_slot( + mach_decode_2(rec_offs_bytes)); } - ut_error; + return ULINT_UNDEFINED; } slot += PAGE_DIR_SLOT_SIZE; @@ -589,12 +590,12 @@ page_copy_rec_list_end( /* For spatial index, we need to insert recs one by one to keep recs ordered. */ - rtr_page_copy_rec_list_end_no_locks(new_block, - block, rec, index, - heap, rec_move, - max_to_move, - &num_moved, - mtr); + *err = rtr_page_copy_rec_list_end_no_locks(new_block, + block, rec, index, + heap, rec_move, + max_to_move, + &num_moved, + mtr); } else { *err = page_copy_rec_list_end_no_locks(new_block, block, rec, index, mtr); @@ -719,6 +720,11 @@ page_copy_rec_list_start( rec_offs* offsets = offsets_; rec_offs_init(offsets_); + if (UNIV_UNLIKELY(!ret)) { + *err = DB_CORRUPTION; + return ret; + } + /* Here, "ret" may be pointing to a user record or the predefined infimum record. */ @@ -753,10 +759,14 @@ page_copy_rec_list_start( /* For spatial index, we need to insert recs one by one to keep recs ordered. */ - rtr_page_copy_rec_list_start_no_locks(new_block, - block, rec, index, heap, - rec_move, max_to_move, - &num_moved, mtr); + *err = rtr_page_copy_rec_list_start_no_locks(new_block, + block, rec, index, + heap, rec_move, + max_to_move, + &num_moved, mtr); + if (*err != DB_SUCCESS) { + return nullptr; + } } else { while (page_cur_get_rec(&cur1) != rec) { offsets = rec_get_offsets(cur1.rec, index, offsets, @@ -857,7 +867,7 @@ zip_reorganize: /*************************************************************//** Deletes records from a page from a given record onward, including that record. The infimum and supremum records are not deleted. */ -void +dberr_t page_delete_rec_list_end( /*=====================*/ rec_t* rec, /*!< in: pointer to record on page */ @@ -884,7 +894,7 @@ page_delete_rec_list_end( { ut_ad(n_recs == 0 || n_recs == ULINT_UNDEFINED); /* Nothing to do, there are no records bigger than the page supremum. */ - return; + return DB_SUCCESS; } if (page_rec_is_infimum(rec) || @@ -895,7 +905,7 @@ page_delete_rec_list_end( { /* We are deleting all records. */ page_create_empty(block, index, mtr); - return; + return DB_SUCCESS; } #if 0 // FIXME: consider deleting the last record as a special case @@ -903,7 +913,7 @@ page_delete_rec_list_end( { page_cur_t cursor= { index, rec, offsets, block }; page_cur_delete_rec(&cursor, index, offsets, mtr); - return; + return DB_SUCCESS; } #endif @@ -936,12 +946,16 @@ page_delete_rec_list_end( if (UNIV_LIKELY_NULL(heap)) mem_heap_free(heap); - return; + return DB_SUCCESS; } #endif byte *prev_rec= page_rec_get_prev(rec); + if (UNIV_UNLIKELY(!prev_rec)) + return DB_CORRUPTION; byte *last_rec= page_rec_get_prev(page_get_supremum_rec(page)); + if (UNIV_UNLIKELY(!last_rec)) + return DB_CORRUPTION; // FIXME: consider a special case of shrinking PAGE_HEAP_TOP @@ -998,9 +1012,11 @@ page_delete_rec_list_end( ut_ad(n_owned > count); n_owned-= count; slot_index= page_dir_find_owner_slot(owner_rec); - ut_ad(slot_index > 0); } + if (UNIV_UNLIKELY(!slot_index || slot_index == ULINT_UNDEFINED)) + return DB_CORRUPTION; + mtr->write<2,mtr_t::MAYBE_NOP>(*block, my_assume_aligned<2> (PAGE_N_DIR_SLOTS + PAGE_HEADER + page), slot_index + 1); @@ -1046,7 +1062,7 @@ page_delete_rec_list_end( mach_write_to_2(last_rec - REC_NEXT, free ? static_cast(free - page_offset(last_rec)) : 0U); - return; + return DB_SUCCESS; } #endif mtr->write<1,mtr_t::MAYBE_NOP>(*block, owned, new_owned); @@ -1066,6 +1082,8 @@ page_delete_rec_list_end( mtr->write<2>(*block, prev_rec - REC_NEXT, PAGE_OLD_SUPREMUM); mtr->write<2>(*block, last_rec - REC_NEXT, free); } + + return DB_SUCCESS; } /*************************************************************//** diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 96202311198..9f1bcbb820e 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1531,15 +1531,10 @@ inline bool IndexPurge::open() noexcept &m_pcur, true, 0, &m_mtr) != DB_SUCCESS) return false; - btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr); - if (rec_is_metadata(btr_pcur_get_rec(&m_pcur), *m_index)) - { - if (!btr_pcur_is_on_user_rec(&m_pcur)) - return false; + rec_t *rec= page_rec_get_next(btr_pcur_get_rec(&m_pcur)); + if (rec_is_metadata(rec, *m_index)) /* Skip the metadata pseudo-record. */ - } - else - btr_pcur_move_to_prev_on_page(&m_pcur); + btr_pcur_get_page_cur(&m_pcur)->rec= rec; return true; } @@ -1582,55 +1577,10 @@ dberr_t IndexPurge::next() noexcept return DB_END_OF_INDEX; } - buf_block_t* block = btr_pcur_get_block(&m_pcur); - uint32_t next_page = btr_page_get_next( - block->page.frame); - - /* MDEV-13542 FIXME: Make these checks part of - btr_pcur_move_to_next_page(), and introduce a - return status that will be checked in all callers! */ - switch (next_page) { - default: - if (next_page != block->page.id().page_no()) { - break; - } - /* MDEV-20931 FIXME: Check that - next_page is within the tablespace - bounds! Also check that it is not a - change buffer bitmap page. */ - /* fall through */ - case 0: - case 1: - case FIL_NULL: - return DB_CORRUPTION; + if (dberr_t err = btr_pcur_move_to_next_page(&m_pcur, + &m_mtr)) { + return err; } - - dict_index_t* index = m_pcur.btr_cur.index; - buf_block_t* next_block = btr_block_get( - *index, next_page, BTR_MODIFY_LEAF, false, - &m_mtr); - - if (UNIV_UNLIKELY(!next_block - || !fil_page_index_page_check( - next_block->page.frame) - || !!dict_index_is_spatial(index) - != (fil_page_get_type( - next_block->page.frame) - == FIL_PAGE_RTREE) - || page_is_comp(next_block->page.frame) - != page_is_comp(block->page.frame) - || btr_page_get_prev( - next_block->page.frame) - != block->page.id().page_no())) { - return DB_CORRUPTION; - } - - btr_leaf_page_release(block, BTR_MODIFY_LEAF, &m_mtr); - - page_cur_set_before_first(next_block, - &m_pcur.btr_cur.page_cur); - - ut_d(page_check_dir(next_block->page.frame)); } else { btr_pcur_move_to_next_on_page(&m_pcur); } @@ -2347,11 +2297,11 @@ row_import_set_sys_max_row_id( if (btr_pcur_open_at_index_side(false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr) == DB_SUCCESS) { - btr_pcur_move_to_prev_on_page(&pcur); - rec = btr_pcur_get_rec(&pcur); + rec = btr_pcur_move_to_prev_on_page(&pcur); - /* Check for empty table. */ - if (page_rec_is_infimum(rec)) { + if (!rec) { + /* The table is corrupted. */ + } else if (page_rec_is_infimum(rec)) { /* The table is empty. */ } else if (rec_is_metadata(rec, *index)) { /* The clustered index contains the metadata diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index f01231fda16..0a8655ee4c4 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -139,7 +139,10 @@ public: if (log_sys.check_flush_or_checkpoint()) { if (mtr_started) { - btr_pcur_move_to_prev_on_page(pcur); + if (!btr_pcur_move_to_prev_on_page(pcur)) { + error = DB_CORRUPTION; + break; + } btr_pcur_store_position(pcur, scan_mtr); scan_mtr->commit(); mtr_started = false; @@ -1839,14 +1842,27 @@ row_merge_read_clustered_index( err_exit: trx->error_key_num = 0; goto func_exit; - } - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - if (rec_is_metadata(btr_pcur_get_rec(&pcur), *clust_index)) { - ut_ad(btr_pcur_is_on_user_rec(&pcur)); - /* Skip the metadata pseudo-record. */ } else { - ut_ad(!clust_index->is_instant()); - btr_pcur_move_to_prev_on_page(&pcur); + rec_t* rec = page_rec_get_next(btr_pcur_get_rec(&pcur)); + if (!rec) { +corrupted_metadata: + err = DB_CORRUPTION; + goto err_exit; + } + if (rec_get_info_bits(rec, page_rec_is_comp(rec)) + & REC_INFO_MIN_REC_FLAG) { + if (!clust_index->is_instant()) { + goto corrupted_metadata; + } + if (page_rec_is_comp(rec) + && rec_get_status(rec) != REC_STATUS_INSTANT) { + goto corrupted_metadata; + } + /* Skip the metadata pseudo-record. */ + btr_pcur_get_page_cur(&pcur)->rec = rec; + } else if (clust_index->is_instant()) { + goto corrupted_metadata; + } } /* Check if the table is supposed to be empty for our read view. @@ -1986,13 +2002,16 @@ err_exit: /* Store the cursor position on the last user record on the page. */ - btr_pcur_move_to_prev_on_page(&pcur); + if (!btr_pcur_move_to_prev_on_page(&pcur)) { + goto corrupted_index; + } /* Leaf pages must never be empty, unless this is the only page in the index tree. */ - ut_ad(btr_pcur_is_on_user_rec(&pcur) - || btr_pcur_get_block( - &pcur)->page.id().page_no() - == clust_index->page); + if (!btr_pcur_is_on_user_rec(&pcur) + && btr_pcur_get_block(&pcur)->page.id() + .page_no() != clust_index->page) { + goto corrupted_index; + } btr_pcur_store_position(&pcur, &mtr); mtr.commit(); @@ -2495,8 +2514,10 @@ write_buffers: we must reread it on the next loop iteration. */ if (mtr_started) { - btr_pcur_move_to_prev_on_page( - &pcur); + if (!btr_pcur_move_to_prev_on_page(&pcur)) { + err = DB_CORRUPTION; + goto func_exit; + } btr_pcur_store_position( &pcur, &mtr); diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 47c4b87a7c9..ebd4ddce130 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -4715,6 +4715,15 @@ wait_table_again: pcur, moves_up, &mtr); if (UNIV_UNLIKELY(need_to_process)) { + if (UNIV_UNLIKELY(!btr_pcur_get_rec(pcur))) { + mtr.commit(); + trx->op_info = ""; + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + return DB_CORRUPTION; + } + if (UNIV_UNLIKELY(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT)) { /* We did a semi-consistent read, @@ -4732,7 +4741,7 @@ wait_table_again: pessimistic locking read, the record cannot be skipped. */ - goto next_rec; + goto next_rec_after_check; } } else if (dtuple_get_n_fields(search_tuple) > 0) { @@ -5727,6 +5736,7 @@ next_rec: == ROW_READ_DID_SEMI_CONSISTENT)) { prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; } +next_rec_after_check: did_semi_consistent_read = false; prebuilt->new_rec_locks = 0; vrow = NULL; @@ -5752,7 +5762,6 @@ next_rec: /* No need to do store restore for R-tree */ mtr.commit(); mtr.start(); - mtr_extra_clust_savepoint = 0; } else if (mtr_extra_clust_savepoint) { /* We must release any clustered index latches if we are moving to the next non-clustered @@ -5760,9 +5769,10 @@ next_rec: order if we would access a different clustered index page right away without releasing the previous. */ mtr.rollback_to_savepoint(mtr_extra_clust_savepoint); - mtr_extra_clust_savepoint = 0; } + mtr_extra_clust_savepoint = 0; + if (moves_up) { if (UNIV_UNLIKELY(spatial_search)) { if (rtr_pcur_move_to_next( @@ -5792,6 +5802,10 @@ next_rec: if (btr_pcur_move_to_prev(pcur, &mtr)) { goto rec_loop; } + if (UNIV_UNLIKELY(!btr_pcur_get_rec(pcur))) { + err = DB_CORRUPTION; + goto normal_return; + } } not_moved: -- cgit v1.2.1 From 5efadf8d8c6076bbee73e45afe050ab62c517f3c Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Sat, 4 Jun 2022 10:16:15 +0400 Subject: MDEV-28747 Index condition pushdown may be configured incorrectly ha_innobase::build_template may initialize m_prebuilt->idx_cond even if there is no valid pushed_idx_cond_keyno. This potentially problematic piece of code was found while working on MDEV-27366 --- storage/innobase/handler/ha_innodb.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index c8fc060ffeb..21da59d309d 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -7718,9 +7718,12 @@ ha_innobase::build_template( ulint num_v = 0; - if ((active_index != MAX_KEY - && active_index == pushed_idx_cond_keyno) - || (pushed_rowid_filter && rowid_filter_is_active)) { + if (active_index != MAX_KEY + && active_index == pushed_idx_cond_keyno) { + m_prebuilt->idx_cond = this; + goto icp; + } else if (pushed_rowid_filter && rowid_filter_is_active) { +icp: /* Push down an index condition or an end_range check. */ for (ulint i = 0; i < n_fields; i++) { const Field* field = table->field[i]; @@ -7901,9 +7904,6 @@ ha_innobase::build_template( } } } - if (active_index == pushed_idx_cond_keyno) { - m_prebuilt->idx_cond = this; - } } else { no_icp: /* No index condition pushdown */ -- cgit v1.2.1 From 4145618103253f634027d15dab3618abfd4ff227 Mon Sep 17 00:00:00 2001 From: Tingyao Nian Date: Fri, 27 May 2022 15:04:31 +0000 Subject: MDEV-22023 Update man page NAME section to say MariaDB instead of MySQL Continue the effort of a previous commit (PR#2114) which changed the man pages titles from MariaDB to MySQL, to further update the man pages. Update the man page NAME sections to use mariadb-* instead of mysql* for MariaDB binaries that are drop-in replacements for MySQL equivalents, indicating that the commands are actually of the MariaDB version. Before: NAME mysql_upgrade - check tables for MariaDB upgrade ... After: NAME mariadb-upgrade - check tables for MariaDB upgrade (mysql_upgrade is now a symlink to mariadb-upgrade) ... All new code of the whole pull request, including one or several files that are either new files or modified ones, are contributed under the BSD-new license. I am contributing on behalf of my employer Amazon Web Services, Inc. --- man/mysql.1 | 2 +- man/mysql_client_test.1 | 4 ++-- man/mysql_convert_table_format.1 | 2 +- man/mysql_find_rows.1 | 2 +- man/mysql_fix_extensions.1 | 2 +- man/mysql_install_db.1 | 2 +- man/mysql_ldb.1 | 2 +- man/mysql_plugin.1 | 2 +- man/mysql_secure_installation.1 | 2 +- man/mysql_setpermission.1 | 2 +- man/mysql_tzinfo_to_sql.1 | 2 +- man/mysql_upgrade.1 | 2 +- man/mysql_waitpid.1 | 2 +- man/mysqlaccess.1 | 2 +- man/mysqladmin.1 | 2 +- man/mysqlbinlog.1 | 2 +- man/mysqlcheck.1 | 2 +- man/mysqld.8 | 2 +- man/mysqld_multi.1 | 4 ++-- man/mysqld_safe.1 | 4 ++-- man/mysqld_safe_helper.1 | 4 ++-- man/mysqldump.1 | 2 +- man/mysqldumpslow.1 | 2 +- man/mysqlhotcopy.1 | 2 +- man/mysqlimport.1 | 2 +- man/mysqlshow.1 | 2 +- man/mysqlslap.1 | 2 +- man/mysqltest.1 | 2 +- 28 files changed, 32 insertions(+), 32 deletions(-) diff --git a/man/mysql.1 b/man/mysql.1 index a64051b0f77..1004e565c37 100644 --- a/man/mysql.1 +++ b/man/mysql.1 @@ -18,7 +18,7 @@ .\" SQL scripts .\" batch SQL files .SH "NAME" -mysql \- the MariaDB command\-line tool +mariadb \- the MariaDB command\-line tool (mysql is now a symlink to mariadb) .SH "SYNOPSIS" .HP \w'\fBmysql\ [\fR\fB\fIoptions\fR\fR\fB]\ \fR\fB\fIdb_name\fR\fR\ 'u \fBmysql [\fR\fB\fIoptions\fR\fR\fB] \fR\fB\fIdb_name\fR\fR diff --git a/man/mysql_client_test.1 b/man/mysql_client_test.1 index 768217cada7..4d0ca0e75a2 100644 --- a/man/mysql_client_test.1 +++ b/man/mysql_client_test.1 @@ -14,9 +14,9 @@ .\" mysql_client_test .\" mysql_client_test_embedded .SH "NAME" -mysql_client_test \- test client API +mariadb-client-test \- test client API (mysql_client_test is now a symlink to mariadb-client-test) .br -mysql_client_test_embedded \- test client API for embedded server +mariadb-client-test-embedded \- test client API for embedded server (mysql_client_test_embedded is now a symlink to mariadb-client-test-embedded) .SH "SYNOPSIS" .HP \w'\fBmysql_client_test\ [\fR\fB\fIoptions\fR\fR\fB]\ [\fR\fB\fItest_name\fR\fR\fB]\ \&.\&.\&.\fR\ 'u \fBmysql_client_test [\fR\fB\fIoptions\fR\fR\fB] [\fR\fB\fItest_name\fR\fR\fB] \&.\&.\&.\fR diff --git a/man/mysql_convert_table_format.1 b/man/mysql_convert_table_format.1 index 0cffaf0dd2c..2f41434a1a6 100644 --- a/man/mysql_convert_table_format.1 +++ b/man/mysql_convert_table_format.1 @@ -13,7 +13,7 @@ .\" ----------------------------------------------------------------- .\" mysql_convert_table_format .SH "NAME" -mysql_convert_table_format \- convert tables to use a given storage engine +mariadb-convert-table-format \- convert tables to use a given storage engine (mysql_convert_table_format is now a symlink to mariadb-convert-table-format) .SH "SYNOPSIS" .HP \w'\fBmysql_convert_table_format\ [\fR\fB\fIoptions\fR\fR\fB]\ \fR\fB\fIdb_name\fR\fR\ 'u \fBmysql_convert_table_format [\fR\fB\fIoptions\fR\fR\fB] \fR\fB\fIdb_name\fR\fR diff --git a/man/mysql_find_rows.1 b/man/mysql_find_rows.1 index 7dfe0509ed4..b2106b77a0f 100644 --- a/man/mysql_find_rows.1 +++ b/man/mysql_find_rows.1 @@ -13,7 +13,7 @@ .\" ----------------------------------------------------------------- .\" mysql_find_rows .SH "NAME" -mysql_find_rows \- extract SQL statements from files +mariadb-find-rows \- extract SQL statements from files (mysql_find_rows is now a symlink to mariadb-find-rows) .SH "SYNOPSIS" .HP \w'\fBmysql_find_rows\ [\fR\fB\fIoptions\fR\fR\fB]\ [\fR\fB\fIfile_name\fR\fR\fB\ \&.\&.\&.]\fR\ 'u \fBmysql_find_rows [\fR\fB\fIoptions\fR\fR\fB] [\fR\fB\fIfile_name\fR\fR\fB \&.\&.\&.]\fR diff --git a/man/mysql_fix_extensions.1 b/man/mysql_fix_extensions.1 index 85406576f5d..4fe7c58073a 100644 --- a/man/mysql_fix_extensions.1 +++ b/man/mysql_fix_extensions.1 @@ -13,7 +13,7 @@ .\" ----------------------------------------------------------------- .\" mysql_fix_extensions .SH "NAME" -mysql_fix_extensions \- normalize table file name extensions +mariadb-fix-extensions \- normalize table file name extensions (mysql_fix_extensions is now a symlink to mariadb-fix-extensions) .SH "SYNOPSIS" .HP \w'\fBmysql_fix_extensions\ \fR\fB\fIdata_dir\fR\fR\ 'u \fBmysql_fix_extensions \fR\fB\fIdata_dir\fR\fR diff --git a/man/mysql_install_db.1 b/man/mysql_install_db.1 index 632a2e204f0..765fc7c3677 100644 --- a/man/mysql_install_db.1 +++ b/man/mysql_install_db.1 @@ -13,7 +13,7 @@ .\" ----------------------------------------------------------------- .\" mysql_install_db .SH "NAME" -mysql_install_db \- initialize MariaDB data directory +mariadb-install-db \- initialize MariaDB data directory (mysql_install_db is now a symlink to mariadb-install-db) .SH "SYNOPSIS" .HP \w'\fBmysql_install_db\ [\fR\fB\fIoptions\fR\fR\fB]\fR\ 'u \fBmysql_install_db [\fR\fB\fIoptions\fR\fR\fB]\fR diff --git a/man/mysql_ldb.1 b/man/mysql_ldb.1 index f8859e259c7..f6e0de32e7e 100644 --- a/man/mysql_ldb.1 +++ b/man/mysql_ldb.1 @@ -9,7 +9,7 @@ .\" disable justification (adjust text to left margin only) .ad l .SH NAME -mysql_ldb \- RocksDB tool +mariadb-ldb \- RocksDB tool (mysql_ldb is now a symlink to mariadb-ldb) .SH DESCRIPTION Use \fBmysql_ldb \-\-help\fR for details on usage\. .PP diff --git a/man/mysql_plugin.1 b/man/mysql_plugin.1 index 5001e542c38..2365ac9b751 100644 --- a/man/mysql_plugin.1 +++ b/man/mysql_plugin.1 @@ -22,7 +22,7 @@ .\" ----------------------------------------------------------------- .\" mysql_plugin .SH "NAME" -mysql_plugin \- configure MariaDB server plugins +mariadb-plugin \- configure MariaDB server plugins (mysql_plugin is now a symlink to mariadb-plugin) .SH "SYNOPSIS" .HP \w'\fBmysql_plugin\ [\fR\fB\fIoptions\fR\fR\fB]\ \fR\fB\fIplugin\fR\fR\fB\ {ENABLE|DISABLE}\fR\ 'u \fBmysql_plugin [\fR\fB\fIoptions\fR\fR\fB] \fR\fB\fIplugin\fR\fR\fB {ENABLE|DISABLE}\fR diff --git a/man/mysql_secure_installation.1 b/man/mysql_secure_installation.1 index fc07f0d98cf..126a59d51ea 100644 --- a/man/mysql_secure_installation.1 +++ b/man/mysql_secure_installation.1 @@ -13,7 +13,7 @@ .\" ----------------------------------------------------------------- .\" mysql_secure_installation .SH "NAME" -mysql_secure_installation \- improve MariaDB installation security +mariadb-secure-installation \- improve MariaDB installation security (mysql_secure_installation is now a symlink to mariadb-secure-installation) .SH "SYNOPSIS" .HP \w'\fBmysql_secure_installation\fR\ 'u \fBmysql_secure_installation\fR diff --git a/man/mysql_setpermission.1 b/man/mysql_setpermission.1 index 58cf1927a35..fa9b8ad425b 100644 --- a/man/mysql_setpermission.1 +++ b/man/mysql_setpermission.1 @@ -13,7 +13,7 @@ .\" ----------------------------------------------------------------- .\" mysql_setpermission .SH "NAME" -mysql_setpermission \- interactively set permissions in grant tables +mariadb-setpermission \- interactively set permissions in grant tables (mysql_setpermission is now a symlink to mariadb-setpermission) .SH "SYNOPSIS" .HP \w'\fBmysql_setpermission\ [\fR\fB\fIoptions\fR\fR\fB]\fR\ 'u \fBmysql_setpermission [\fR\fB\fIoptions\fR\fR\fB]\fR diff --git a/man/mysql_tzinfo_to_sql.1 b/man/mysql_tzinfo_to_sql.1 index 732fbe4f868..c830bcf3f9e 100644 --- a/man/mysql_tzinfo_to_sql.1 +++ b/man/mysql_tzinfo_to_sql.1 @@ -14,7 +14,7 @@ .\" mysql_tzinfo_to_sql .\" time zone tables .SH "NAME" -mysql_tzinfo_to_sql \- load the time zone tables +mariadb-tzinfo-to-sql \- load the time zone tables (mysql_tzinfo_to_sql is now a symlink to mariadb-tzinfo-to-sql) .SH "SYNOPSIS" .HP \w'\fBmysql_tzinfo_to_sql\ \fR\fB\fIarguments\fR\fR\ 'u \fBmysql_tzinfo_to_sql \fR\fB\fIarguments\fR\fR diff --git a/man/mysql_upgrade.1 b/man/mysql_upgrade.1 index fa46d792505..f6beca90c80 100644 --- a/man/mysql_upgrade.1 +++ b/man/mysql_upgrade.1 @@ -15,7 +15,7 @@ .\" upgrading MySQL .\" MySQL: upgrading .SH "NAME" -mysql_upgrade \- check tables for MariaDB upgrade +mariadb-upgrade \- check tables for MariaDB upgrade (mysql_upgrade is now a symlink to mariadb-upgrade) .SH "SYNOPSIS" .HP \w'\fBmysql_upgrade\ [\fR\fB\fIoptions\fR\fR\fB]\fR\ 'u \fBmysql_upgrade [\fR\fB\fIoptions\fR\fR\fB]\fR diff --git a/man/mysql_waitpid.1 b/man/mysql_waitpid.1 index 5fb4c403b1c..32c1034dfb0 100644 --- a/man/mysql_waitpid.1 +++ b/man/mysql_waitpid.1 @@ -13,7 +13,7 @@ .\" ----------------------------------------------------------------- .\" mysql_waitpid .SH "NAME" -mysql_waitpid \- kill process and wait for its termination +mariadb-waitpid \- kill process and wait for its termination (mysql_waitpid is now a symlink to mariadb-waitpid) .SH "SYNOPSIS" .HP \w'\fBmysql_waitpid\ [\fR\fB\fIoptions\fR\fR\fB]\ \fR\fB\fIpid\fR\fR\fB\ \fR\fB\fIwait_time\fR\fR\ 'u \fBmysql_waitpid [\fR\fB\fIoptions\fR\fR\fB] \fR\fB\fIpid\fR\fR\fB \fR\fB\fIwait_time\fR\fR diff --git a/man/mysqlaccess.1 b/man/mysqlaccess.1 index 0f0a5baea39..9ddea58ecd7 100644 --- a/man/mysqlaccess.1 +++ b/man/mysqlaccess.1 @@ -13,7 +13,7 @@ .\" ----------------------------------------------------------------- .\" mysqlaccess .SH "NAME" -mysqlaccess \- client for checking access privileges +mariadb-access \- client for checking access privileges (mysqlaccess is now a symlink to mariadb-access) .SH "SYNOPSIS" .HP \w'\fBmysqlaccess\ [\fR\fB\fIhost_name\fR\fR\fB\ [\fR\fB\fIuser_name\fR\fR\fB\ [\fR\fB\fIdb_name\fR\fR\fB]]]\ [\fR\fB\fIoptions\fR\fR\fB]\fR\ 'u \fBmysqlaccess [\fR\fB\fIhost_name\fR\fR\fB [\fR\fB\fIuser_name\fR\fR\fB [\fR\fB\fIdb_name\fR\fR\fB]]] [\fR\fB\fIoptions\fR\fR\fB]\fR diff --git a/man/mysqladmin.1 b/man/mysqladmin.1 index fb458af1d47..02cf8b275a3 100644 --- a/man/mysqladmin.1 +++ b/man/mysqladmin.1 @@ -15,7 +15,7 @@ .\" administration: server .\" server administration .SH "NAME" -mysqladmin \- client for administering a MariaDB server +mariadb-admin \- client for administering a MariaDB server (mysqladmin is now a symlink to mariadb-admin) .SH "SYNOPSIS" .HP \w'\fBmysqladmin\ [\fR\fB\fIoptions\fR\fR\fB]\ \fR\fB\fIcommand\fR\fR\fB\ [\fR\fB\fIcommand\-arg\fR\fR\fB]\ [\fR\fB\fIcommand\fR\fR\fB\ [\fR\fB\fIcommand\-arg\fR\fR\fB]]\ \&.\&.\&.\fR\ 'u \fBmysqladmin [\fR\fB\fIoptions\fR\fR\fB] \fR\fB\fIcommand\fR\fR\fB [\fR\fB\fIcommand\-arg\fR\fR\fB] [\fR\fB\fIcommand\fR\fR\fB [\fR\fB\fIcommand\-arg\fR\fR\fB]] \&.\&.\&.\fR diff --git a/man/mysqlbinlog.1 b/man/mysqlbinlog.1 index cd6d1616b4d..989749548b0 100644 --- a/man/mysqlbinlog.1 +++ b/man/mysqlbinlog.1 @@ -13,7 +13,7 @@ .\" ----------------------------------------------------------------- .\" mysqlbinlog .SH "NAME" -mysqlbinlog \- utility for processing binary log files +mariadb-binlog \- utility for processing binary log files (mysqlbinlog is now a symlink to mariadb-binlog) .SH "SYNOPSIS" .HP \w'\fBmysqlbinlog\ [\fR\fBoptions\fR\fB]\ \fR\fB\fIlog_file\fR\fR\fB\ \&.\&.\&.\fR\ 'u \fBmysqlbinlog [\fR\fBoptions\fR\fB] \fR\fB\fIlog_file\fR\fR\fB \&.\&.\&.\fR diff --git a/man/mysqlcheck.1 b/man/mysqlcheck.1 index 39d4f119563..2119e632a4b 100644 --- a/man/mysqlcheck.1 +++ b/man/mysqlcheck.1 @@ -17,7 +17,7 @@ .\" tables: maintenance .\" tables: repair .SH "NAME" -mysqlcheck \- a table maintenance program +mariadb-check \- a table maintenance program (mysqlcheck is now a symlink to mariadb-check) .SH "SYNOPSIS" .HP \w'\fBmysqlcheck\ [\fR\fB\fIoptions\fR\fR\fB]\ [\fR\fB\fIdb_name\fR\fR\fB\ [\fR\fB\fItbl_name\fR\fR\fB\ \&.\&.\&.]]\fR\ 'u \fBmysqlcheck [\fR\fB\fIoptions\fR\fR\fB] [\fR\fB\fIdb_name\fR\fR\fB [\fR\fB\fItbl_name\fR\fR\fB \&.\&.\&.]]\fR diff --git a/man/mysqld.8 b/man/mysqld.8 index 6889e042e98..ec63e33ff34 100644 --- a/man/mysqld.8 +++ b/man/mysqld.8 @@ -14,7 +14,7 @@ .\" mysqld: MariaDB server .\" MariaDB server: mysqld .SH "NAME" -mysqld \- the MariaDB server +mariadbd \- the MariaDB server (mysqld is now a symlink to mariadbd) .SH "SYNOPSIS" .HP \w'\fBmysqld\ [\fR\fB\fIoptions\fR\fR\fB]\fR\ 'u \fBmysqld [\fR\fB\fIoptions\fR\fR\fB]\fR diff --git a/man/mysqld_multi.1 b/man/mysqld_multi.1 index 4b63dd7caf8..1504b36bc5f 100644 --- a/man/mysqld_multi.1 +++ b/man/mysqld_multi.1 @@ -1,6 +1,6 @@ '\" t .\" -.TH "\FBMARIADB-MULTI\FR" "1" "27 June 2019" "MariaDB 10\&.5" "MariaDB Database System" +.TH "\FBMARIADBD-MULTI\FR" "1" "27 June 2019" "MariaDB 10\&.5" "MariaDB Database System" .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- @@ -16,7 +16,7 @@ .\" scripts .\" multi mysqld .SH "NAME" -mysqld_multi \- manage multiple MariaDB servers +mariadbd-multi \- manage multiple MariaDB servers (mysqld_multi is now a symlink to mariadbd-multi) .SH "SYNOPSIS" .HP \w'\fBmysqld_multi\ [\fR\fB\fIoptions\fR\fR\fB]\ {start|stop|report}\ [\fR\fB\fIGNR\fR\fR\fB[,\fR\fB\fIGNR\fR\fR\fB]\ \&.\&.\&.]\fR\ 'u \fBmysqld_multi [\fR\fB\fIoptions\fR\fR\fB] {start|stop|report} [\fR\fB\fIGNR\fR\fR\fB[,\fR\fB\fIGNR\fR\fR\fB] \&.\&.\&.]\fR diff --git a/man/mysqld_safe.1 b/man/mysqld_safe.1 index 80b8274bf67..a995d61dca1 100644 --- a/man/mysqld_safe.1 +++ b/man/mysqld_safe.1 @@ -1,6 +1,6 @@ '\" t .\" -.TH "\FBMARIADB-SAFE\FR" "1" "27 June 2019" "MariaDB 10\&.5" "MariaDB Database System" +.TH "\FBMARIADBD-SAFE\FR" "1" "27 June 2019" "MariaDB 10\&.5" "MariaDB Database System" .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- @@ -15,7 +15,7 @@ .\" tools: mysqld_safe .\" scripts .SH "NAME" -mysqld_safe \- MariaDB server startup script +mariadbd-safe \- MariaDB server startup script (mysqld_safe is now a symlink to mariadbd-safe) .SH "SYNOPSIS" .HP \w'\fBmysqld_safe\ \fR\fB\fIoptions\fR\fR\ 'u \fBmysqld_safe \fR\fB\fIoptions\fR\fR diff --git a/man/mysqld_safe_helper.1 b/man/mysqld_safe_helper.1 index fd45258cb68..8c33f87bcee 100644 --- a/man/mysqld_safe_helper.1 +++ b/man/mysqld_safe_helper.1 @@ -1,6 +1,6 @@ '\" t .\" -.TH "\FBMARIADB-SAFE-HELPER\FR" "1" "27 June 2019" "MariaDB 10\&.5" "MariaDB Database System" +.TH "\FBMARIADBD-SAFE-HELPER\FR" "1" "27 June 2019" "MariaDB 10\&.5" "MariaDB Database System" .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- @@ -9,7 +9,7 @@ .\" disable justification (adjust text to left margin only) .ad l .SH NAME -mysqld_safe_helper \- helper script +mariadbd-safe-helper \- helper script (mysqld_safe_helper is now a symlink to mariadbd-safe-helper) .SH DESCRIPTION Use: Helper script\. .PP diff --git a/man/mysqldump.1 b/man/mysqldump.1 index 705d88bbd20..edf60b977f6 100644 --- a/man/mysqldump.1 +++ b/man/mysqldump.1 @@ -17,7 +17,7 @@ .\" databases: dumping .\" tables: dumping .SH "NAME" -mysqldump \- a database backup program +mariadb-dump \- a database backup program (mysqldump is now a symlink to mariadb-dump) .SH "SYNOPSIS" .HP \w'\fBmysqldump\ [\fR\fB\fIoptions\fR\fR\fB]\ [\fR\fB\fIdb_name\fR\fR\fB\ [\fR\fB\fItbl_name\fR\fR\fB\ \&.\&.\&.]]\fR\ 'u \fBmysqldump [\fR\fB\fIoptions\fR\fR\fB] [\fR\fB\fIdb_name\fR\fR\fB [\fR\fB\fItbl_name\fR\fR\fB \&.\&.\&.]]\fR diff --git a/man/mysqldumpslow.1 b/man/mysqldumpslow.1 index b1d03ed474d..e4c0b940cef 100644 --- a/man/mysqldumpslow.1 +++ b/man/mysqldumpslow.1 @@ -13,7 +13,7 @@ .\" ----------------------------------------------------------------- .\" mysqldumpslow .SH "NAME" -mysqldumpslow \- Summarize slow query log files +mariadb-dumpslow \- Summarize slow query log files (mysqldumpslow is now a symlink to mariadb-dumpslow) .SH "SYNOPSIS" .HP \w'\fBmysqldumpslow\ [\fR\fBoptions\fR\fB]\ [\fR\fB\fIlog_file\fR\fR\fB\ \&.\&.\&.]\fR\ 'u \fBmysqldumpslow [\fR\fBoptions\fR\fB] [\fR\fB\fIlog_file\fR\fR\fB \&.\&.\&.]\fR diff --git a/man/mysqlhotcopy.1 b/man/mysqlhotcopy.1 index 224896f7347..579cbff6c19 100644 --- a/man/mysqlhotcopy.1 +++ b/man/mysqlhotcopy.1 @@ -17,7 +17,7 @@ .\" databases: dumping .\" tables: dumping .SH "NAME" -mysqlhotcopy \- a database backup program +mariadb-hotcopy \- a database backup program (mysqlhotcopy is now a symlink to mariadb-hotcopy) .SH "SYNOPSIS" .HP \w'\fBmysqlhotcopy\ \fR\fB\fIarguments\fR\fR\ 'u \fBmysqlhotcopy \fR\fB\fIarguments\fR\fR diff --git a/man/mysqlimport.1 b/man/mysqlimport.1 index 7916da98a6e..76ee4bcf28d 100644 --- a/man/mysqlimport.1 +++ b/man/mysqlimport.1 @@ -17,7 +17,7 @@ .\" files: text .\" text files: importing .SH "NAME" -mysqlimport \- a data import program +mariadb-import \- a data import program (mysqlimport is now a symlink to mariadb-import) .SH "SYNOPSIS" .HP \w'\fBmysqlimport\ [\fR\fB\fIoptions\fR\fR\fB]\ \fR\fB\fIdb_name\fR\fR\fB\ \fR\fB\fItextfile1\fR\fR\fB\ \&.\&.\&.\fR\ 'u \fBmysqlimport [\fR\fB\fIoptions\fR\fR\fB] \fR\fB\fIdb_name\fR\fR\fB \fR\fB\fItextfile1\fR\fR\fB \&.\&.\&.\fR diff --git a/man/mysqlshow.1 b/man/mysqlshow.1 index 974f1249a07..38192538654 100644 --- a/man/mysqlshow.1 +++ b/man/mysqlshow.1 @@ -18,7 +18,7 @@ .\" columns: displaying .\" showing: database information .SH "NAME" -mysqlshow \- display database, table, and column information +mariadb-show \- display database, table, and column information (mysqlshow is now a symlink to mariadb-show) .SH "SYNOPSIS" .HP \w'\fBmysqlshow\ [\fR\fB\fIoptions\fR\fR\fB]\ [\fR\fB\fIdb_name\fR\fR\fB\ [\fR\fB\fItbl_name\fR\fR\fB\ [\fR\fB\fIcol_name\fR\fR\fB]]]\fR\ 'u \fBmysqlshow [\fR\fB\fIoptions\fR\fR\fB] [\fR\fB\fIdb_name\fR\fR\fB [\fR\fB\fItbl_name\fR\fR\fB [\fR\fB\fIcol_name\fR\fR\fB]]]\fR diff --git a/man/mysqlslap.1 b/man/mysqlslap.1 index dceffba418f..5d8a7779b8e 100644 --- a/man/mysqlslap.1 +++ b/man/mysqlslap.1 @@ -14,7 +14,7 @@ .\" mysqlslap .\" load emulation .SH "NAME" -mysqlslap \- load emulation client +mariadb-slap \- load emulation client (mysqlslap is now a symlink to mariadb-slap) .SH "SYNOPSIS" .HP \w'\fBmysqlslap\ [\fR\fB\fIoptions\fR\fR\fB]\fR\ 'u \fBmysqlslap [\fR\fB\fIoptions\fR\fR\fB]\fR diff --git a/man/mysqltest.1 b/man/mysqltest.1 index 8586d50926e..502286dd50b 100644 --- a/man/mysqltest.1 +++ b/man/mysqltest.1 @@ -14,7 +14,7 @@ .\" mysqltest .\" mysqltest_embedded .SH "NAME" -mysqltest \- program to run test cases +mariadb-test \- program to run test cases (mysqltest is now a symlink to mariadb-test) .br mysqltest_embedded \- program to run embedded test cases .SH "SYNOPSIS" -- cgit v1.2.1 From 9c207c88c1411d1e538e8412182311de63757771 Mon Sep 17 00:00:00 2001 From: GuiXiaoDi Date: Fri, 3 Jun 2022 15:08:46 +0800 Subject: mysql.server.sh fix for non-Red Hat platforms The else condition is meant to be here to define the functions if the Red Hat include file isn't there. Fixes: commit 467011bcac3b3f42ae6f21dde8d88e78708b21d1 / MDEV-26614 RedHat -> Red Hat by Daniel Black --- support-files/mysql.server.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/support-files/mysql.server.sh b/support-files/mysql.server.sh index 7f034601539..54bd9463f61 100644 --- a/support-files/mysql.server.sh +++ b/support-files/mysql.server.sh @@ -9,7 +9,7 @@ # When this is done the mysql server will be started when the machine is # started and shut down when the systems goes down. -# Comments to support chkconfig on RedHat Linux +# Comments to support chkconfig on Red Hat Linux # chkconfig: 2345 64 36 # description: A very fast and reliable SQL database engine. @@ -52,7 +52,7 @@ datadir= # Negative numbers mean to wait indefinitely service_startup_timeout=900 -# Lock directory for RedHat / SuSE. +# Lock directory for Red Hat / SuSE. lockdir='/var/lock/subsys' lock_file_path="$lockdir/mysql" @@ -91,7 +91,7 @@ datadir_set= # # Use LSB init script functions for printing messages, if possible -# Include non-LSB RedHat init functions to make systemctl redirect work +# Include non-LSB Red Hat init functions to make systemctl redirect work init_functions="/etc/init.d/functions" lsb_functions="/lib/lsb/init-functions" if test -f $lsb_functions; then @@ -100,6 +100,7 @@ fi if test -f $init_functions; then . $init_functions +else log_success_msg() { echo " SUCCESS! $@" @@ -309,7 +310,7 @@ case "$mode" in $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" "$@" & wait_for_ready; return_value=$? - # Make lock for RedHat / SuSE + # Make lock for Red Hat / SuSE if test -w "$lockdir" then touch "$lock_file_path" @@ -339,7 +340,7 @@ case "$mode" in rm "$mysqld_pid_file_path" fi - # Delete lock for RedHat / SuSE + # Delete lock for Red Hat / SuSE if test -f "$lock_file_path" then rm -f "$lock_file_path" -- cgit v1.2.1 From 2cd1edfc2176649cda412b2ff4f1075ad23448ba Mon Sep 17 00:00:00 2001 From: Daniel Lewart Date: Thu, 9 Jun 2022 13:20:37 +1000 Subject: MDEV-25577 mariadb-tzinfo-to-sql generates superfluous warnings The zoneinfo directory is littered with non-timezone information files. These frequently contain extensions, not present in real timezone files. Alo leapseconds is frequently there and is not a timezone file. --- sql/tztime.cc | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/sql/tztime.cc b/sql/tztime.cc index dfd0301037f..ec9e68ee66d 100644 --- a/sql/tztime.cc +++ b/sql/tztime.cc @@ -2470,15 +2470,6 @@ MEM_ROOT tz_storage; char fullname[FN_REFLEN + 1]; char *root_name_end; -/* - known file types that exist in the zoneinfo directory that are safe to - silently skip -*/ -const char *known_extensions[]= { - ".tab", - NullS -}; - /* Recursively scan zoneinfo directory and print all found time zone @@ -2575,20 +2566,19 @@ scan_tz_dir(char * name_end, uint symlink_recursion_level, uint verbose) else { /* - Some systems (like debian, opensuse etc) have description - files (.tab). We skip these silently if verbose is > 0 + Some systems (like Debian, openSUSE, etc) have non-timezone files: + * iso3166.tab + * leap-seconds.list + * leapseconds + * tzdata.zi + * zone.tab + * zone1970.tab + We skip these silently unless verbose > 0. */ const char *current_ext= fn_ext(fullname); - my_bool known_ext= 0; + my_bool known_ext= strlen(current_ext) || + !strcmp(my_basename(fullname), "leapseconds"); - for (const char **ext= known_extensions ; *ext ; ext++) - { - if (!strcmp(*ext, current_ext)) - { - known_ext= 1; - break; - } - } if (verbose > 0 || !known_ext) { fflush(stdout); -- cgit v1.2.1 From 98293130c34cfd39bf2e8c904923abe3942d23be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 9 Jun 2022 10:57:28 +0300 Subject: MDEV-28779: ALTER TABLE IMPORT TABLESPACE corrupts an encrypted table PageConverter::update_header(): Remove an unnecessary write. The field that was originally called FIL_PAGE_FILE_FLUSH_LSN only made sense for the first page of the system tablespace (initially, for the first page of each file of the system tablespace). It never had any meaning for .ibd files, and it lost its original meaning in MariaDB Server 10.8.1 when commit b07920b634f455c39e3650c6163bec2a8ce0ffe0 (MDEV-27199) removed the ability to start without ib_logfile0. If the most significant 32 bits of the LSN are nonzero, this unnecessary write would write the wrong encryption key identifier to the page. The first page of any file is never encrypted, so normally those bytes should be 0 for any .ibd file. --- mysql-test/suite/mariabackup/huge_lsn.result | 10 +++++++++- mysql-test/suite/mariabackup/huge_lsn.test | 18 +++++++++++++++++- storage/innobase/row/row0import.cc | 6 +----- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/mysql-test/suite/mariabackup/huge_lsn.result b/mysql-test/suite/mariabackup/huge_lsn.result index 61ca3400721..2225a958756 100644 --- a/mysql-test/suite/mariabackup/huge_lsn.result +++ b/mysql-test/suite/mariabackup/huge_lsn.result @@ -2,7 +2,7 @@ # MDEV-13416 mariabackup fails with EFAULT "Bad Address" # FOUND 1 /InnoDB: New log files created, LSN=175964\d{8}/ in mysqld.1.err -CREATE TABLE t(i INT) ENGINE INNODB; +CREATE TABLE t(i INT) ENGINE=INNODB ENCRYPTED=YES; INSERT INTO t VALUES(1); # xtrabackup backup SET GLOBAL innodb_flush_log_at_trx_commit=1; @@ -15,6 +15,14 @@ INSERT INTO t VALUES(2); SELECT * FROM t; i 1 +FLUSH TABLE t FOR EXPORT; +UNLOCK TABLES; +ALTER TABLE t DISCARD TABLESPACE; +ALTER TABLE t IMPORT TABLESPACE; +FLUSH TABLE t FOR EXPORT; +UNLOCK TABLES; +ALTER TABLE t DISCARD TABLESPACE; +ALTER TABLE t IMPORT TABLESPACE; DROP TABLE t; # shutdown server # remove datadir diff --git a/mysql-test/suite/mariabackup/huge_lsn.test b/mysql-test/suite/mariabackup/huge_lsn.test index 3349ef40df8..55978071be3 100644 --- a/mysql-test/suite/mariabackup/huge_lsn.test +++ b/mysql-test/suite/mariabackup/huge_lsn.test @@ -40,7 +40,7 @@ let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; --let SEARCH_PATTERN= InnoDB: New log files created, LSN=175964\d{8} --source include/search_pattern_in_file.inc -CREATE TABLE t(i INT) ENGINE INNODB; +CREATE TABLE t(i INT) ENGINE=INNODB ENCRYPTED=YES; INSERT INTO t VALUES(1); echo # xtrabackup backup; @@ -56,6 +56,22 @@ exec $XTRABACKUP --prepare --target-dir=$targetdir; --source include/restart_and_restore.inc --enable_result_log SELECT * FROM t; +FLUSH TABLE t FOR EXPORT; +copy_file $_datadir/test/t.ibd $_datadir/test/t_copy.ibd; +copy_file $_datadir/test/t.cfg $_datadir/test/t_copy.cfg; +UNLOCK TABLES; +ALTER TABLE t DISCARD TABLESPACE; +move_file $_datadir/test/t_copy.ibd $_datadir/test/t.ibd; +move_file $_datadir/test/t_copy.cfg $_datadir/test/t.cfg; +ALTER TABLE t IMPORT TABLESPACE; +FLUSH TABLE t FOR EXPORT; +copy_file $_datadir/test/t.ibd $_datadir/test/t_copy.ibd; +copy_file $_datadir/test/t.cfg $_datadir/test/t_copy.cfg; +UNLOCK TABLES; +ALTER TABLE t DISCARD TABLESPACE; +move_file $_datadir/test/t_copy.ibd $_datadir/test/t.ibd; +move_file $_datadir/test/t_copy.cfg $_datadir/test/t.cfg; +ALTER TABLE t IMPORT TABLESPACE; DROP TABLE t; rmdir $targetdir; let $targetdir= $targetdir_old; diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 885a43bb2f8..db4c22b4969 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2021, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1996,10 +1996,6 @@ PageConverter::update_header( ib::warn() << "Space id check in the header failed: ignored"; } - mach_write_to_8( - get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, - m_current_lsn); - /* Write back the adjusted flags. */ mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + get_frame(block), m_space_flags); -- cgit v1.2.1