diff options
author | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2018-03-23 17:44:57 +0530 |
---|---|---|
committer | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2018-03-23 17:44:57 +0530 |
commit | 052d8c8ed9916b3c6d567603ad98ced089e2e3f5 (patch) | |
tree | 430a666041470113fd13b8c3f9c46a066ad319fb | |
parent | 9472f0f4a46adf3fc7f1a5018b5b1aadd40d30be (diff) | |
download | mariadb-git-bb-10.3-MDEV-15325.tar.gz |
MDEV-15325 Incomplete validation of missing tablespace during recoverybb-10.3-MDEV-15325
Problem:
=======
During validation of missing tablespace, missing tablespace id is
being compared with hash table of redo logs (recv_sys->addr_hash). But if the
hash table ran out of memory then there is a possibility that it will not contain
the redo logs of all tablespace. In that case, Server will load the InnoDB
even though there is a missing tablespace.
Solution:
========
If the recv_sys->addr_hash hash table ran out of memory then InnoDB needs
to scan the remaining redo log again to validate the missing tablespace.
-rw-r--r-- | mysql-test/suite/innodb/r/innodb-index.result | 14 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/innodb-index.test | 48 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 250 |
3 files changed, 224 insertions, 88 deletions
diff --git a/mysql-test/suite/innodb/r/innodb-index.result b/mysql-test/suite/innodb/r/innodb-index.result index 54ad4e8a927..519b289fd4b 100644 --- a/mysql-test/suite/innodb/r/innodb-index.result +++ b/mysql-test/suite/innodb/r/innodb-index.result @@ -1848,3 +1848,17 @@ create table t1(o1 int, o2 int, o3 int, primary key(o1,o2,o3)) engine = innodb; insert into t1 values(1,1,2),(2,2,1); alter table t1 drop primary key, add primary key(o1), lock=none; drop table t1; +CREATE TABLE t1(f1 INT PRIMARY KEY)ENGINE=InnoDB; +CREATE TABLE t2(f1 INT PRIMARY KEY)ENGINE=InnoDB; +# Kill the server +# Wrong space_id in a dirty file and a missing file +SELECT * FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +# Restore t1 and t2 +SELECT * FROM t1; +f1 +SELECT * FROM t2; +f1 +DROP TABLE t1, t2; diff --git a/mysql-test/suite/innodb/t/innodb-index.test b/mysql-test/suite/innodb/t/innodb-index.test index 721808c038c..802bcccb9f4 100644 --- a/mysql-test/suite/innodb/t/innodb-index.test +++ b/mysql-test/suite/innodb/t/innodb-index.test @@ -1074,5 +1074,53 @@ insert into t1 values('abd', 'acd'), ('acd', 'abd'); alter table t1 drop primary key, add primary key(o2,o1), lock=none; drop table t1; + # no skip sort cases --source suite/innodb/include/alter_table_pk_no_sort.inc + +--source include/no_checkpoint_start.inc +CREATE TABLE t1(f1 INT PRIMARY KEY)ENGINE=InnoDB; + +CREATE TABLE t2(f1 INT PRIMARY KEY)ENGINE=InnoDB; + +--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1, t2; +--source include/no_checkpoint_end.inc + +let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err; +let $check_no_innodb=SELECT * FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); + +--echo # Wrong space_id in a dirty file and a missing file + +--copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/test/t0.ibd +--move_file $MYSQLD_DATADIR/test/t2.ibd $MYSQLD_DATADIR/test/t1.ibd + +--source include/start_mysqld.inc +--eval $check_no_innodb +--source include/shutdown_mysqld.inc + +--echo # Restore t1 and t2 + +--move_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/test/t2.ibd +--move_file $MYSQLD_DATADIR/test/t0.ibd $MYSQLD_DATADIR/test/t1.ibd + +--source include/start_mysqld.inc + +SELECT * FROM t1; +SELECT * FROM t2; + +DROP TABLE t1, t2; + +--disable_query_log + +call mtr.add_suppression("InnoDB: Tablespace .* was not found at .*t[12].ibd."); +call mtr.add_suppression("InnoDB: Set innodb_force_recovery=1 to ignore this and to permanently lose all changes to the tablespace"); +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +call mtr.add_suppression("Plugin 'InnoDB' init function returned error"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); + +--enable_query_log + +--let $restart_parameters= +--source include/restart_mysqld.inc diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 5be52ec0696..e28bd6bff8a 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -158,6 +158,14 @@ typedef std::map< static recv_spaces_t recv_spaces; +typedef std::set<ulint> space_set_t; + +/* list of missing tablespace id. */ +space_set_t missing_spaces; + +/* Lastly added LSN to the hash table of log records. */ +static lsn_t last_stored_lsn; + /** Process a file name from a MLOG_FILE_* record. @param[in,out] name file name @param[in] len length of the file name @@ -2681,26 +2689,30 @@ recv_sys_justify_left_parsing_buf(void) /** Scan redo log from a buffer and stores new log data to the parsing buffer. Parse and hash the log records if new data found. Apply log records automatically when the hash table becomes full. +@param[in] available_memory Maximum size for hash table of recs +@param[in,out] store_to_hash whether the records should be + stored to the hash table. Resets + the value when available_memory + runs out +@param[in] log_bloc log segment +@param[in] checkpoint_lsn latest checkpoint LSN +@param[in] start_lsn buffer start LSN +@param[in] end_lsn buffer end LSN +@param[in,out] contiguous_lsn it is known that all log groups contain + contiguous log data upto this LSN +@param[out] group_scanned_lsn scanning succeeded up to this LSN @return true if not able to scan any more in this log group */ static bool recv_scan_log_recs( -/*===============*/ - ulint available_memory,/*!< in: we let the hash table of recs - to grow to this size, at the maximum */ - store_t* store_to_hash, /*!< in,out: whether the records should be - stored to the hash table; this is reset - if just debug checking is needed, or - when the available_memory runs out */ - const byte* log_block, /*!< in: log segment */ - lsn_t checkpoint_lsn, /*!< in: latest checkpoint LSN */ - lsn_t start_lsn, /*!< in: buffer start LSN */ - lsn_t end_lsn, /*!< in: buffer end LSN */ - lsn_t* contiguous_lsn, /*!< in/out: it is known that all log - groups contain contiguous log data up - to this lsn */ - lsn_t* group_scanned_lsn)/*!< out: scanning succeeded up to - this lsn */ + ulint available_memory, + store_t* store_to_hash, + const byte* log_block, + lsn_t checkpoint_lsn, + lsn_t start_lsn, + lsn_t end_lsn, + lsn_t* contiguous_lsn, + lsn_t* group_scanned_lsn) { lsn_t scanned_lsn = start_lsn; bool finished = false; @@ -2851,6 +2863,7 @@ recv_scan_log_recs( if (*store_to_hash != STORE_NO && mem_heap_get_size(recv_sys->heap) > available_memory) { + last_stored_lsn = recv_sys->recovered_lsn; *store_to_hash = STORE_NO; } @@ -2983,15 +2996,102 @@ recv_init_missing_space(dberr_t err, const recv_spaces_t::const_iterator& i) return(err); } +/** Report the missing tablespace and discard the redo logs for the deleted +tablespace. +@param[in] rescan rescan of redo logs is needed if hash table + ran out of memory +@return error code or DB_SUCCESS. */ +static MY_ATTRIBUTE((warn_unused_result)) +dberr_t +recv_validate_tablespace(bool rescan) +{ + dberr_t err = DB_SUCCESS; + + for (ulint h = 0; h < hash_get_n_cells(recv_sys->addr_hash); h++) { + + for (recv_addr_t* recv_addr = static_cast<recv_addr_t*>( + HASH_GET_FIRST(recv_sys->addr_hash, h)); + recv_addr != 0; + recv_addr = static_cast<recv_addr_t*>( + HASH_GET_NEXT(addr_hash, recv_addr))) { + + const ulint space = recv_addr->space; + + if (is_predefined_tablespace(space)) { + continue; + } + + recv_spaces_t::iterator i + = recv_spaces.find(space); + ut_ad(i != recv_spaces.end()); + + if (i->second.deleted) { + ut_ad(missing_spaces.find(space) + == missing_spaces.end()); + recv_addr->state = RECV_DISCARDED; + continue; + } + + space_set_t::iterator m = missing_spaces.find( + space); + + if (m != missing_spaces.end()) { + missing_spaces.erase(m); + err = recv_init_missing_space(err, i); + recv_addr->state = RECV_DISCARDED; + /* All further redo log for this + tablespace should be removed. */ + i->second.deleted = true; + } + } + } + + if (err != DB_SUCCESS) { + return(err); + } + + /** When rescan is not needed then recv_sys->addr_hash will have + all space id belongs to redo log. If rescan is needed and + innodb_force_recovery > 0 then InnoDB can ignore missing tablespace. */ + if ((rescan && srv_force_recovery > 0) || !rescan) { + + for (space_set_t::const_iterator m = missing_spaces.begin(); + m != missing_spaces.end(); m++) { + + recv_spaces_t::iterator i = recv_spaces.find(*m); + ut_ad(i != recv_spaces.end()); + + if (rescan && srv_force_recovery > 0) { + ib::warn() << "Tablespace " << i->first + <<" was not found at " << i->second.name + <<", and innodb_force_recovery was set." + <<" All redo log for this tablespace" + <<" will be ignored!"; + continue; + } + + if (!rescan) { + ib::info() << "Tablespace " << i->first + << " was not found at '" + << i->second.name << "', but there" + << " were no modifications either."; + } + } + + missing_spaces.clear(); + } + + return DB_SUCCESS; +} + /** Check if all tablespaces were found for crash recovery. +@param[in] rescan rescan of redo log is needed. @return error code or DB_SUCCESS */ static MY_ATTRIBUTE((warn_unused_result)) dberr_t -recv_init_crash_recovery_spaces() +recv_init_crash_recovery_spaces(bool rescan) { - typedef std::set<ulint> space_set_t; bool flag_deleted = false; - space_set_t missing_spaces; ut_ad(!srv_read_only_mode); ut_ad(recv_needed_recovery); @@ -3025,76 +3125,10 @@ recv_init_crash_recovery_spaces() } if (flag_deleted) { - dberr_t err = DB_SUCCESS; - - for (ulint h = 0; - h < hash_get_n_cells(recv_sys->addr_hash); - h++) { - for (recv_addr_t* recv_addr - = static_cast<recv_addr_t*>( - HASH_GET_FIRST( - recv_sys->addr_hash, h)); - recv_addr != 0; - recv_addr = static_cast<recv_addr_t*>( - HASH_GET_NEXT(addr_hash, recv_addr))) { - const ulint space = recv_addr->space; - - if (is_predefined_tablespace(space)) { - continue; - } - - recv_spaces_t::iterator i - = recv_spaces.find(space); - ut_ad(i != recv_spaces.end()); - - if (i->second.deleted) { - ut_ad(missing_spaces.find(space) - == missing_spaces.end()); - recv_addr->state = RECV_DISCARDED; - continue; - } - - space_set_t::iterator m = missing_spaces.find( - space); - - if (m != missing_spaces.end()) { - missing_spaces.erase(m); - err = recv_init_missing_space(err, i); - recv_addr->state = RECV_DISCARDED; - /* All further redo log for this - tablespace should be removed. */ - i->second.deleted = true; - } - } - } - - if (err != DB_SUCCESS) { - return(err); - } - } - - for (space_set_t::const_iterator m = missing_spaces.begin(); - m != missing_spaces.end(); m++) { - recv_spaces_t::iterator i = recv_spaces.find(*m); - ut_ad(i != recv_spaces.end()); - - ib::info() << "Tablespace " << i->first - << " was not found at '" << i->second.name - << "', but there were no modifications either."; + return recv_validate_tablespace(rescan); } - if (srv_operation == SRV_OPERATION_NORMAL) { - buf_dblwr_process(); - } - - if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { - /* Spawn the background thread to flush dirty pages - from the buffer pools. */ - recv_writer_thread_active = true; - os_thread_create(recv_writer_thread, 0, 0); - } - - return(DB_SUCCESS); + return DB_SUCCESS; } /** Start recovering from a redo log checkpoint. @@ -3268,13 +3302,53 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) log_sys->lsn = recv_sys->recovered_lsn; if (recv_needed_recovery) { - err = recv_init_crash_recovery_spaces(); + err = recv_init_crash_recovery_spaces(rescan); if (err != DB_SUCCESS) { log_mutex_exit(); return(err); } + /* If there is any missing tablespace and rescan is needed + then there is possiblity that hash table will not contain all + redo log belongs to tablespace id. Rescan the remaining + unstored redo logs and validate the missing tablespace. */ + if (missing_spaces.size() > 0) { + + ut_ad(rescan); + + while(rescan) { + lsn_t recent_stored_lsn= last_stored_lsn; + + rescan = recv_group_scan_log_recs( + group, checkpoint_lsn, &recent_stored_lsn, + false); + + ut_ad(recv_sys->found_corrupt_log != true); + ut_ad(recv_sys->found_corrupt_fs != true); + + err = recv_validate_tablespace(rescan); + + if (err != DB_SUCCESS) { + log_mutex_exit(); + return err; + } + } + + missing_spaces.clear(); + } + + if (srv_operation == SRV_OPERATION_NORMAL) { + buf_dblwr_process(); + } + + if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { + /* Spawn the background thread to flush dirty pages + from the buffer pools. */ + recv_writer_thread_active = true; + os_thread_create(recv_writer_thread, 0, 0); + } + if (rescan) { contiguous_lsn = checkpoint_lsn; |