diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2021-05-17 18:12:33 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2021-05-17 18:12:33 +0300 |
commit | 86dc7b4d4cfe15a2d37f8b5f60c4fce5dba9491d (patch) | |
tree | b23fd8a5ee30469841cf0013adf6cf4e892d56c8 | |
parent | c290c0d7e0d8184611d878be2adc7cd62ca773ef (diff) | |
download | mariadb-git-86dc7b4d4cfe15a2d37f8b5f60c4fce5dba9491d.tar.gz |
MDEV-24626 Remove synchronous write of page0 file during file creation
During data file creation, InnoDB holds dict_sys mutex, tries to
write page 0 of the file and flushes the file. This not only causing
unnecessary contention but also a deviation from the write-ahead
logging protocol.
The clean sequence of operations is that we first start a dictionary
transaction and write SYS_TABLES and SYS_INDEXES records that identify
the tablespace. Then, we durably write a FILE_CREATE record to the
write-ahead log and create the file.
Recovery should not unnecessarily insist that the first page of each
data file that is referred to by the redo log is valid. It must be
enough that page 0 of the tablespace can be initialized based on the
redo log contents.
We introduce a new data structure deferred_spaces that keeps track
of corrupted-looking files during recovery. The data structure holds
the last LSN of a FILE_ record referring to the data file, the
tablespace identifier, and the last known file name.
There are two scenarios can happen during recovery:
i) Sufficient memory: InnoDB can reconstruct the
tablespace after parsing all redo log records.
ii) Insufficient memory(multiple apply phase): InnoDB should
store the deferred tablespace redo logs even though
tablespace is not present. InnoDB should start constructing
the tablespace when it first encounters deferred tablespace
id.
Mariabackup copies the zero filled ibd file in backup_fix_ddl() as
the extension of .new file. Mariabackup test case does page flushing
when it deals with DDL operation during backup operation.
fil_ibd_create(): Remove the write of page0 and flushing of file
fil_ibd_load(): Return FIL_LOAD_DEFER if the tablespace has
zero filled page0
Datafile: Clean up the error handling, and do not report errors
if we are in the middle of recovery. The caller will check
Datafile::m_defer.
fil_node_t::deferred: Indicates whether the tablespace loading was
deferred during recovery
FIL_LOAD_DEFER: Returned by fil_ibd_load() to indicate that tablespace
file was cannot be loaded.
recv_sys_t::recover_deferred(): Invoke deferred_spaces.create() to
initialize fil_space_t based on buffered metadata and records to
initialize page 0. Ignore the flags in fil_name_t, because they are
intentionally invalid.
fil_name_process(): Update deferred_spaces.
recv_sys_t::parse(): Store the redo log if the tablespace id
is present in deferred spaces
recv_sys_t::recover_low(): Should recover the first page of
the tablespace even though the tablespace instance is not
present
recv_sys_t::apply(): Initialize the deferred tablespace
before applying the deferred tablespace records
recv_validate_tablespace(): Skip the validation for deferred_spaces.
recv_rename_files(): Moved and revised from recv_sys_t::apply().
For deferred-recovery tablespaces, do not attempt to rename the
file if a deferred-recovery tablespace is associated with the name.
recv_recovery_from_checkpoint_start(): Invoke recv_rename_files()
and initialize all deferred tablespaces before applying redo log.
fil_node_t::read_page0(): Skip page0 validation if the tablespace
is deferred
buf_page_create_deferred(): A variant of buf_page_create() when
the fil_space_t is not available yet
This is joint work with Thirunarayanan Balathandayuthapani,
who implemented an initial prototype.
38 files changed, 720 insertions, 369 deletions
diff --git a/extra/mariabackup/fil_cur.cc b/extra/mariabackup/fil_cur.cc index c9a0ce2a7fc..824fb4f4232 100644 --- a/extra/mariabackup/fil_cur.cc +++ b/extra/mariabackup/fil_cur.cc @@ -364,6 +364,7 @@ xb_fil_cur_result_t xb_fil_cur_read(xb_fil_cur_t* cursor, ib_int64_t offset; ib_int64_t to_read; const ulint page_size = cursor->page_size; + bool defer = false; xb_ad(!cursor->is_system() || page_size == srv_page_size); cursor->read_filter->get_next_batch(&cursor->read_filter_ctxt, @@ -418,13 +419,15 @@ read_retry: ret = XB_FIL_CUR_ERROR; goto func_exit; } + + defer = space->is_deferred(); /* check pages for corruption and re-read if necessary. i.e. in case of partially written pages */ for (page = cursor->buf, i = 0; i < npages; page += page_size, i++) { unsigned page_no = cursor->buf_page_no + i; - if (page_is_corrupted(page, page_no, cursor, space)){ + if (!defer && page_is_corrupted(page, page_no, cursor, space)) { retry_count--; if (retry_count == 0) { diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 4a7bae7fb41..60958eb05ae 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -510,7 +510,8 @@ bool CorruptedPages::empty() const } static void xb_load_single_table_tablespace(const std::string &space_name, - bool set_size); + bool set_size, + ulint defer_space_id=0); static void xb_data_files_close(); static fil_space_t* fil_space_get_by_name(const char* name); @@ -587,7 +588,8 @@ xtrabackup_add_datasink(ds_ctxt_t *ds) typedef void (*process_single_tablespace_func_t)(const char *dirname, const char *filname, bool is_remote, - bool skip_node_page0); + bool skip_node_page0, + ulint defer_space_id); static dberr_t enumerate_ibd_files(process_single_tablespace_func_t callback); /* ======== Datafiles iterator ======== */ @@ -1680,7 +1682,8 @@ debug_sync_point(const char *name) static std::set<std::string> tables_for_export; static void append_export_table(const char *dbname, const char *tablename, - bool is_remote, bool skip_node_page0) + bool is_remote, bool skip_node_page0, + ulint defer_space_id) { if(dbname && tablename && !is_remote) { @@ -3271,11 +3274,14 @@ xb_fil_io_init() node page0 will be read, and it's size and free pages limit will be set from page 0, what is neccessary for checking and fixing corrupted pages. +@param[in] defer_space_id use the space id to create space object +when there is deferred tablespace */ static void xb_load_single_table_tablespace(const char *dirname, const char *filname, bool is_remote, - bool skip_node_page0) + bool skip_node_page0, + ulint defer_space_id) { ut_ad(srv_operation == SRV_OPERATION_BACKUP || srv_operation == SRV_OPERATION_RESTORE_DELTA @@ -3298,6 +3304,7 @@ static void xb_load_single_table_tablespace(const char *dirname, lsn_t flush_lsn; dberr_t err; fil_space_t *space; + bool defer = false; name = static_cast<char*>(ut_malloc_nokey(pathlen)); @@ -3329,14 +3336,30 @@ static void xb_load_single_table_tablespace(const char *dirname, } for (int i = 0; i < 10; i++) { + file->m_defer = false; err = file->validate_first_page(&flush_lsn); - if (err != DB_CORRUPTION) { + + if (file->m_defer) { + if (defer_space_id) { + defer = true; + file->set_space_id(defer_space_id); + file->set_flags(FSP_FLAGS_PAGE_SSIZE()); + err = DB_SUCCESS; + break; + } + } else if (err != DB_CORRUPTION) { break; } my_sleep(1000); } + if (!defer && file->m_defer) { + delete file; + ut_free(name); + return; + } + bool is_empty_file = file->exists() && file->is_empty_file(); if (err == DB_SUCCESS && file->space_id() != SRV_TMP_SPACE_ID) { @@ -3345,9 +3368,11 @@ static void xb_load_single_table_tablespace(const char *dirname, FIL_TYPE_TABLESPACE, NULL/* TODO: crypt_data */); ut_a(space != NULL); - space->add(file->filepath(), - skip_node_page0 ? file->detach() : pfs_os_file_t(), - 0, false, false); + fil_node_t* node= space->add( + file->filepath(), + skip_node_page0 ? file->detach() : pfs_os_file_t(), + 0, false, false); + node->deferred= defer; mysql_mutex_lock(&fil_system.mutex); space->read_page0(); mysql_mutex_unlock(&fil_system.mutex); @@ -3368,7 +3393,8 @@ static void xb_load_single_table_tablespace(const char *dirname, } static void xb_load_single_table_tablespace(const std::string &space_name, - bool skip_node_page0) + bool skip_node_page0, + ulint defer_space_id) { std::string name(space_name); bool is_remote= access((name + ".ibd").c_str(), R_OK) != 0; @@ -3379,14 +3405,13 @@ static void xb_load_single_table_tablespace(const std::string &space_name, buf[sizeof buf - 1]= '\0'; const char *dbname= buf; char *p= strchr(buf, '/'); - if (p == 0) + if (!p) die("Unexpected tablespace %s filename %s", space_name.c_str(), name.c_str()); - ut_a(p); *p= 0; const char *tablename= p + 1; xb_load_single_table_tablespace(dbname, tablename, is_remote, - skip_node_page0); + skip_node_page0, defer_space_id); } /** Scan the database directories under the MySQL datadir, looking for @@ -3425,12 +3450,11 @@ static dberr_t enumerate_ibd_files(process_single_tablespace_func_t callback) /* General tablespaces are always at the first level of the data home dir */ - if (dbinfo.type == OS_FILE_TYPE_FILE) { - bool is_isl = ends_with(dbinfo.name, ".isl"); - bool is_ibd = !is_isl && ends_with(dbinfo.name,".ibd"); - - if (is_isl || is_ibd) { - (*callback)(NULL, dbinfo.name, is_isl, false); + if (dbinfo.type != OS_FILE_TYPE_FILE) { + const bool is_isl = ends_with(dbinfo.name, ".isl"); + if (is_isl || ends_with(dbinfo.name,".ibd")) { + (*callback)(nullptr, dbinfo.name, is_isl, + false, 0); } } @@ -3486,7 +3510,7 @@ static dberr_t enumerate_ibd_files(process_single_tablespace_func_t callback) if (strlen(fileinfo.name) > 4) { bool is_isl= false; if (ends_with(fileinfo.name, ".ibd") || ((is_isl = ends_with(fileinfo.name, ".isl")))) - (*callback)(dbinfo.name, fileinfo.name, is_isl, false); + (*callback)(dbinfo.name, fileinfo.name, is_isl, false, 0); } } @@ -4567,9 +4591,9 @@ FTWRL. This ensures consistent backup in presence of DDL. */ void backup_fix_ddl(CorruptedPages &corrupted_pages) { - std::set<std::string> new_tables; std::set<std::string> dropped_tables; std::map<std::string, std::string> renamed_tables; + space_id_to_name_t new_tables; /* Disable further DDL on backed up tables (only needed for --no-lock).*/ pthread_mutex_lock(&backup_mutex); @@ -4619,7 +4643,7 @@ void backup_fix_ddl(CorruptedPages &corrupted_pages) if (ddl_tracker.drops.find(id) == ddl_tracker.drops.end()) { dropped_tables.erase(name); - new_tables.insert(name); + new_tables[id] = name; if (opt_log_innodb_page_corruption) corrupted_pages.drop_space(id); } @@ -4661,12 +4685,12 @@ void backup_fix_ddl(CorruptedPages &corrupted_pages) } DBUG_EXECUTE_IF("check_mdl_lock_works", DBUG_ASSERT(new_tables.size() == 0);); - for (std::set<std::string>::iterator iter = new_tables.begin(); - iter != new_tables.end(); iter++) { - const char *space_name = iter->c_str(); - if (check_if_skip_table(space_name)) - continue; - xb_load_single_table_tablespace(*iter, false); + + for (const auto &t : new_tables) { + if (!check_if_skip_table(t.second.c_str())) { + xb_load_single_table_tablespace(t.second, false, + t.first); + } } datafiles_iter_t it2; @@ -4677,6 +4701,7 @@ void backup_fix_ddl(CorruptedPages &corrupted_pages) std::string dest_name= filename_to_spacename( node->name, strlen(node->name)); dest_name.append(".new"); + xtrabackup_copy_datafile(node, 0, dest_name.c_str(), wf_write_through, corrupted_pages); } diff --git a/mysql-test/suite/innodb/r/log_file_name.result b/mysql-test/suite/innodb/r/log_file_name.result index 10f27163372..8a22615eae0 100644 --- a/mysql-test/suite/innodb/r/log_file_name.result +++ b/mysql-test/suite/innodb/r/log_file_name.result @@ -1,6 +1,7 @@ SET GLOBAL innodb_file_per_table=ON; FLUSH TABLES; CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB; +# restart CREATE TABLE t3(a INT PRIMARY KEY) ENGINE=InnoDB; BEGIN; INSERT INTO t3 VALUES (33101),(347); @@ -31,7 +32,7 @@ WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS FOUND 1 /InnoDB: Ignoring data file '.*t[23].ibd' with space ID/ in mysqld.1.err -FOUND 1 /InnoDB: Tablespace \d+ was not found at .*t1.ibd/ in mysqld.1.err +NOT FOUND /InnoDB: Tablespace \d+ was not found at .*t1.ibd/ in mysqld.1.err FOUND 1 /InnoDB: Tablespace \d+ was not found at .*t3.ibd/ in mysqld.1.err FOUND 2 /InnoDB: Set innodb_force_recovery=1 to ignore this and to permanently lose all changes to the tablespace/ in mysqld.1.err # Fault 4: Missing data file @@ -54,7 +55,7 @@ WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS NOT FOUND /\[Note\] InnoDB: Cannot read first page of .*t2.ibd/ in mysqld.1.err -FOUND 1 /\[ERROR\] InnoDB: Datafile .*t2.*\. Cannot determine the space ID from the first 64 pages/ in mysqld.1.err +FOUND 1 /.*\[ERROR\] InnoDB: Cannot apply log to \[page id: space=[1-9][0-9]*, page number=3\] of corrupted file './test/t2\.ibd'/ in mysqld.1.err # restart SELECT * FROM t2; a @@ -85,27 +86,6 @@ INSERT INTO u6 VALUES(2); # Kill the server # Fault 6: All-zero data file and innodb_force_recovery # restart: --innodb-force-recovery=1 -SELECT * FROM INFORMATION_SCHEMA.ENGINES -WHERE engine = 'innodb' -AND support IN ('YES', 'DEFAULT', 'ENABLED'); -ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS -FOUND 1 /\[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd/ in mysqld.1.err -FOUND 1 /\[ERROR\] InnoDB: Datafile .*u1.*\. Cannot determine the space ID from the first 64 pages/ in mysqld.1.err -NOT FOUND /\[Note\] InnoDB: Cannot read first page of .*u2.ibd/ in mysqld.1.err -# Fault 7: Missing or wrong data file and innodb_force_recovery -# restart: --innodb-force-recovery=1 -SELECT * FROM INFORMATION_SCHEMA.ENGINES -WHERE engine = 'innodb' -AND support IN ('YES', 'DEFAULT', 'ENABLED'); -ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS -FOUND 1 /\[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd/ in mysqld.1.err -FOUND 1 /InnoDB: At LSN: \d+: unable to open file .*u[1-5].ibd for tablespace/ in mysqld.1.err -FOUND 1 /\[ERROR\] InnoDB: Cannot replay rename of tablespace \d+ from '.*u4.ibd' to '.*u6.ibd' because the target file exists/ in mysqld.1.err -# restart: --innodb-force-recovery=1 -FOUND 1 /\[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd/ in mysqld.1.err -FOUND 1 /InnoDB: At LSN: \d+: unable to open file .*u[1-5].ibd for tablespace/ in mysqld.1.err -FOUND 1 /\[Warning\] InnoDB: Tablespace \d+ was not found at .*u[1-5].ibd, and innodb_force_recovery was set. All redo log for this tablespace will be ignored!/ in mysqld.1.err -# restart DROP TABLE u1,u2,u3,u6; # List of files: db.opt diff --git a/mysql-test/suite/innodb/t/log_file_name.test b/mysql-test/suite/innodb/t/log_file_name.test index 1d0a0b35665..f6a32b93dd9 100644 --- a/mysql-test/suite/innodb/t/log_file_name.test +++ b/mysql-test/suite/innodb/t/log_file_name.test @@ -12,6 +12,7 @@ FLUSH TABLES; CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB; +--source include/restart_mysqld.inc --source include/no_checkpoint_start.inc CREATE TABLE t3(a INT PRIMARY KEY) ENGINE=InnoDB; @@ -120,7 +121,7 @@ eval $check_no_innodb; let SEARCH_PATTERN= \[Note\] InnoDB: Cannot read first page of .*t2.ibd; --source include/search_pattern_in_file.inc -let SEARCH_PATTERN= \[ERROR\] InnoDB: Datafile .*t2.*\. Cannot determine the space ID from the first 64 pages; +let SEARCH_PATTERN= .*\[ERROR\] InnoDB: Cannot apply log to \\[page id: space=[1-9][0-9]*, page number=3\\] of corrupted file './test/t2\\.ibd'; --source include/search_pattern_in_file.inc # Restore t2.ibd @@ -150,13 +151,15 @@ call mtr.add_suppression("InnoDB: Cannot open datafile for read-write: '.*t2\.ib # The following are for aborted startup without --innodb-force-recovery: call mtr.add_suppression("InnoDB: Tablespace .* was not found at .*test"); call mtr.add_suppression("InnoDB: Set innodb_force_recovery=1 to ignore this and to permanently lose all changes to the tablespace"); -call mtr.add_suppression("InnoDB: Cannot read first page of '.*test.[tu]2.ibd' I/O error"); +call mtr.add_suppression("InnoDB: Cannot read first page of '.*test.[tu]2.ibd': I/O error"); +call mtr.add_suppression("InnoDB: Cannot apply log to \\[page id: space=[1-9][0-9]*, page number=3\\] of corrupted file './test/t2\\.ibd'"); call mtr.add_suppression("InnoDB: Datafile '.*test.*ibd' is corrupted"); call mtr.add_suppression("InnoDB: Cannot replay file rename. Remove either file and try again"); call mtr.add_suppression("InnoDB: Cannot rename.*because the target file exists"); call mtr.add_suppression("InnoDB: Log scan aborted at LSN"); # The following are for the --innodb-force-recovery=1 with broken u* tables: -call mtr.add_suppression("InnoDB: The size of the file .*u1\\.ibd is only 16384 bytes, should be at least 65536"); +call mtr.add_suppression("InnoDB: The size of the file .*u[12]\\.ibd is only [1-9][0-9]* bytes, should be at least 65536"); +call mtr.add_suppression("InnoDB: The size of tablespace file '.*test/u[12].ibd' is only"); call mtr.add_suppression("InnoDB: The error means the system cannot find the path specified"); call mtr.add_suppression("InnoDB: .*you must create directories"); call mtr.add_suppression("InnoDB: Cannot open datafile for read-only: '.*u[1-5]\.ibd'"); @@ -199,69 +202,14 @@ EOF --exec echo "" > $MYSQLD_DATADIR/test/u2.ibd -# TODO: Test with this, once -# Bug#18131883 IMPROVE INNODB ERROR MESSAGES REGARDING FILES -# has been fixed: -#--mkdir $MYSQLD_DATADIR/test/u3.ibd - --copy_file $MYSQLD_DATADIR/test/u6.ibd $MYSQLD_DATADIR/test/u4.ibd --let $restart_parameters= --innodb-force-recovery=1 --source include/start_mysqld.inc -eval $check_no_innodb; - -let SEARCH_PATTERN= \[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd; ---source include/search_pattern_in_file.inc - -let SEARCH_PATTERN= \[ERROR\] InnoDB: Datafile .*u1.*\. Cannot determine the space ID from the first 64 pages; ---source include/search_pattern_in_file.inc - -# TODO: These errors should state the file name (u2.ibd) and be ignored -# in innodb-force-recovery mode once -# Bug#18131883 IMPROVE INNODB ERROR MESSAGES REGARDING FILES -# has been fixed: -let SEARCH_PATTERN= \[Note\] InnoDB: Cannot read first page of .*u2.ibd; ---source include/search_pattern_in_file.inc - ---source include/shutdown_mysqld.inc - -# Allow --innodb-force-recovery to start despite the broken file. -# TODO: Remove this workaround, and make --innodb-force-recovery=1 -# ignore the broken file. ---remove_file $MYSQLD_DATADIR/test/u2.ibd - ---echo # Fault 7: Missing or wrong data file and innodb_force_recovery - ---source include/start_mysqld.inc -eval $check_no_innodb; - -let SEARCH_PATTERN= \[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd; ---source include/search_pattern_in_file.inc - -let SEARCH_PATTERN= InnoDB: At LSN: \d+: unable to open file .*u[1-5].ibd for tablespace; ---source include/search_pattern_in_file.inc - -let SEARCH_PATTERN= \[ERROR\] InnoDB: Cannot replay rename of tablespace \d+ from '.*u4.ibd' to '.*u6.ibd' because the target file exists; ---source include/search_pattern_in_file.inc - ---remove_file $MYSQLD_DATADIR/test/u6.ibd - ---source include/restart_mysqld.inc - -let SEARCH_PATTERN= \[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd; ---source include/search_pattern_in_file.inc - -let SEARCH_PATTERN= InnoDB: At LSN: \d+: unable to open file .*u[1-5].ibd for tablespace; ---source include/search_pattern_in_file.inc - -let SEARCH_PATTERN= \[Warning\] InnoDB: Tablespace \d+ was not found at .*u[1-5].ibd, and innodb_force_recovery was set. All redo log for this tablespace will be ignored!; ---source include/search_pattern_in_file.inc - ---let $restart_parameters= ---source include/restart_mysqld.inc - DROP TABLE u1,u2,u3,u6; +--remove_file $MYSQLD_DATADIR/test/u4.ibd + --echo # List of files: --list_files $MYSQLD_DATADIR/test diff --git a/mysql-test/suite/innodb_fts/r/crash_recovery.result b/mysql-test/suite/innodb_fts/r/crash_recovery.result index 104b06d8636..518e5007048 100644 --- a/mysql-test/suite/innodb_fts/r/crash_recovery.result +++ b/mysql-test/suite/innodb_fts/r/crash_recovery.result @@ -29,7 +29,7 @@ connect ddl3, localhost, root,,; CREATE TABLE t3(a TEXT,b TEXT,FULLTEXT INDEX(a)) ENGINE=InnoDB; ALTER TABLE t3 DROP INDEX a, ADD FULLTEXT INDEX(b), ALGORITHM=COPY; connection default; -# restart: with restart_parameters +# restart disconnect ddl1; disconnect ddl2; disconnect ddl3; @@ -69,7 +69,7 @@ DELETE FROM articles LIMIT 1; ROLLBACK; disconnect flush_redo_log; connection default; -# restart: with restart_parameters +# restart disconnect dml; INSERT INTO articles (title,body) VALUES ('MySQL Tutorial','DBMS stands for DataBase ...'); @@ -129,7 +129,7 @@ id title body 1 MySQL Tutorial DBMS stands for Database... 2 MariaDB Tutorial DB means Database ... connection default; -# restart: with restart_parameters +# restart disconnect dml; disconnect dml2; INSERT INTO articles VALUES (8, 12, 'MySQL Tutorial','DBMS stands for DataBase ...'); diff --git a/mysql-test/suite/innodb_fts/t/crash_recovery.test b/mysql-test/suite/innodb_fts/t/crash_recovery.test index 3c3a41c7b2a..f0fbab1092f 100644 --- a/mysql-test/suite/innodb_fts/t/crash_recovery.test +++ b/mysql-test/suite/innodb_fts/t/crash_recovery.test @@ -93,16 +93,6 @@ SET DEBUG_SYNC='now WAIT_FOR 3'; --enable_query_log } -if (!$have_debug) -{ -# Work around the lack of MDEV-24626 -let $restart_parameters=--innodb-force-recovery=1; -} -if ($have_debug) -{ -let $restart_parameters=--innodb-force-recovery=0; -} -let $restart_noprint=1; let $shutdown_timeout=0; --source include/restart_mysqld.inc @@ -301,16 +291,15 @@ call mtr.add_suppression("InnoDB indexes are inconsistent with what defined in \ call mtr.add_suppression("InnoDB could not find key no [01] with name [ab] from dict cache for table test/t[123]"); call mtr.add_suppression("InnoDB: Table test/t[123] contains .* indexes inside InnoDB"); call mtr.add_suppression("InnoDB: Table `test`\\.`t3` does not exist"); +# MDEV-24626 FIXME: a 0-sized file will not be deleted! +--list_files $datadir/test #sql-alter-*.ibd -# Work around the lack of MDEV-24626 as well. ---remove_files_wildcard $datadir/test #sql-alter-*.ibd ---remove_files_wildcard $datadir/test #sql-backup-*.ibd +# Some errors are reported despite the MDEV-24626 fix. call mtr.add_suppression("InnoDB: Cannot (read first page of|open datafile for read-only:) '\\./test/(FTS_|#sql-(alter|backup)-).*\\.ibd'"); call mtr.add_suppression("InnoDB: Datafile '\\./test/(FTS_|#sql-(alter|backup)-).*\\.ibd' is corrupted"); call mtr.add_suppression("InnoDB: (The error means|Operating system error)"); call mtr.add_suppression("InnoDB: Ignoring tablespace for `test`\\.`(FTS_|#sql-(backup|alter)-).*` because it could not be opened\\."); -call mtr.add_suppression("InnoDB: Tablespace [1-9][0-9]* was not found at ./test/(FTS_|#sql-(alter|backup)-).*\\.ibd, and innodb_force_recovery was set"); -call mtr.add_suppression("InnoDB: Corrupted page \\[page id: space=[1-9][0-9]*, page number=0\\] of datafile './test/(FTS_|#sql-(alter|backup)-).*\\.ibd' could not be found in the doublewrite buffer\\."); +call mtr.add_suppression("InnoDB: Expected tablespace id [1-9][0-9]* but found 0 in the file .*/test/(FTS_|#sql-(alter|backup)-).*\\.ibd"); --enable_query_log } --remove_files_wildcard $datadir/test #sql-*.frm diff --git a/mysql-test/suite/mariabackup/big_innodb_log.result b/mysql-test/suite/mariabackup/big_innodb_log.result index 7bd5d20049d..b9b6b6afce3 100644 --- a/mysql-test/suite/mariabackup/big_innodb_log.result +++ b/mysql-test/suite/mariabackup/big_innodb_log.result @@ -12,6 +12,7 @@ INSERT INTO t VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (0), (1), (2), (3), (4), (5), (6), (7), (8), (9); +set global innodb_log_checkpoint_now = 1; # xtrabackup backup, execute the following query after test.t is copied: # BEGIN NOT ATOMIC INSERT INTO test.t SELECT * FROM test.t; UPDATE test.t SET i = 10 WHERE i = 0; DELETE FROM test.t WHERE i = 1; END SELECT count(*) FROM t WHERE i = 0; diff --git a/mysql-test/suite/mariabackup/big_innodb_log.test b/mysql-test/suite/mariabackup/big_innodb_log.test index 4a87ecb18fe..247e7179c42 100644 --- a/mysql-test/suite/mariabackup/big_innodb_log.test +++ b/mysql-test/suite/mariabackup/big_innodb_log.test @@ -32,6 +32,7 @@ INSERT INTO t VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (0), (1), (2), (3), (4), (5), (6), (7), (8), (9); +set global innodb_log_checkpoint_now = 1; --let after_copy_test_t=BEGIN NOT ATOMIC INSERT INTO test.t SELECT * FROM test.t; UPDATE test.t SET i = 10 WHERE i = 0; DELETE FROM test.t WHERE i = 1; END --echo # xtrabackup backup, execute the following query after test.t is copied: diff --git a/mysql-test/suite/mariabackup/drop_table_during_backup.result b/mysql-test/suite/mariabackup/drop_table_during_backup.result index a0fa9db5b94..dfcde706ba8 100644 --- a/mysql-test/suite/mariabackup/drop_table_during_backup.result +++ b/mysql-test/suite/mariabackup/drop_table_during_backup.result @@ -3,6 +3,7 @@ CREATE TABLE t2 (i int) ENGINE=INNODB; CREATE TABLE t3 (i int) ENGINE=INNODB; CREATE TABLE t4 (i int) ENGINE=INNODB; CREATE TABLE t5 (i int) ENGINE=INNODB; +set global innodb_log_checkpoint_now=1; # xtrabackup prepare # shutdown server # remove datadir diff --git a/mysql-test/suite/mariabackup/drop_table_during_backup.test b/mysql-test/suite/mariabackup/drop_table_during_backup.test index e3a81b77b71..2ac82945ffe 100644 --- a/mysql-test/suite/mariabackup/drop_table_during_backup.test +++ b/mysql-test/suite/mariabackup/drop_table_during_backup.test @@ -6,6 +6,8 @@ CREATE TABLE t3 (i int) ENGINE=INNODB; CREATE TABLE t4 (i int) ENGINE=INNODB; CREATE TABLE t5 (i int) ENGINE=INNODB; +set global innodb_log_checkpoint_now=1; + --let before_copy_test_t1=DROP TABLE test.t1 --let after_copy_test_t2=DROP TABLE test.t2; # MDEV-18185, drop + rename combination diff --git a/mysql-test/suite/mariabackup/incremental_ddl_during_backup.result b/mysql-test/suite/mariabackup/incremental_ddl_during_backup.result index 33a3b0001a1..ab5f237b2a9 100644 --- a/mysql-test/suite/mariabackup/incremental_ddl_during_backup.result +++ b/mysql-test/suite/mariabackup/incremental_ddl_during_backup.result @@ -3,6 +3,7 @@ CREATE TABLE t1(i INT PRIMARY KEY) ENGINE INNODB; CREATE TABLE t2(i INT PRIMARY KEY) ENGINE INNODB; CREATE TABLE t3(i INT) ENGINE INNODB; CREATE TABLE t10(i INT PRIMARY KEY) ENGINE INNODB; +set global innodb_log_checkpoint_now = 1; # Create full backup , modify table, then create incremental/differential backup INSERT into t1 values(1); # Prepare full backup, apply incremental one diff --git a/mysql-test/suite/mariabackup/incremental_ddl_during_backup.test b/mysql-test/suite/mariabackup/incremental_ddl_during_backup.test index ebdb2137523..d7ba15c28ae 100644 --- a/mysql-test/suite/mariabackup/incremental_ddl_during_backup.test +++ b/mysql-test/suite/mariabackup/incremental_ddl_during_backup.test @@ -10,6 +10,8 @@ CREATE TABLE t2(i INT PRIMARY KEY) ENGINE INNODB; CREATE TABLE t3(i INT) ENGINE INNODB; CREATE TABLE t10(i INT PRIMARY KEY) ENGINE INNODB; +set global innodb_log_checkpoint_now = 1; + echo # Create full backup , modify table, then create incremental/differential backup; --disable_result_log exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$basedir; diff --git a/mysql-test/suite/mariabackup/lock_ddl_per_table.result b/mysql-test/suite/mariabackup/lock_ddl_per_table.result index 434b6852530..c1b28e46071 100644 --- a/mysql-test/suite/mariabackup/lock_ddl_per_table.result +++ b/mysql-test/suite/mariabackup/lock_ddl_per_table.result @@ -7,5 +7,6 @@ PARTITION p1 VALUES LESS THAN (1995), PARTITION p2 VALUES LESS THAN (2000), PARTITION p3 VALUES LESS THAN (2005) ) ; +set global innodb_log_checkpoint_now = 1; DROP TABLE t; DROP TABLE `bobby``tables`; diff --git a/mysql-test/suite/mariabackup/lock_ddl_per_table.test b/mysql-test/suite/mariabackup/lock_ddl_per_table.test index 2689508e554..18c207718b5 100644 --- a/mysql-test/suite/mariabackup/lock_ddl_per_table.test +++ b/mysql-test/suite/mariabackup/lock_ddl_per_table.test @@ -13,9 +13,11 @@ CREATE TABLE `bobby``tables` (id INT, name VARCHAR(50), purchased DATE) ENGINE I PARTITION p3 VALUES LESS THAN (2005) ) ; +set global innodb_log_checkpoint_now = 1; + --disable_result_log exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir --lock-ddl-per-table=1 --dbug=+d,check_mdl_lock_works; --enable_result_log DROP TABLE t; DROP TABLE `bobby``tables`; -rmdir $targetdir;
\ No newline at end of file +rmdir $targetdir; diff --git a/mysql-test/suite/mariabackup/mdev-14447.result b/mysql-test/suite/mariabackup/mdev-14447.result index 357e883178b..8f7a1a8708b 100644 --- a/mysql-test/suite/mariabackup/mdev-14447.result +++ b/mysql-test/suite/mariabackup/mdev-14447.result @@ -7,7 +7,6 @@ COMMIT; SELECT count(*) FROM t; count(*) 100000 -FOUND 1 /Checksum mismatch in datafile/ in backup.log # Prepare full backup, apply incremental one # Restore and check results # shutdown server diff --git a/mysql-test/suite/mariabackup/mdev-14447.test b/mysql-test/suite/mariabackup/mdev-14447.test index 7877a7805e1..b6998976e8c 100644 --- a/mysql-test/suite/mariabackup/mdev-14447.test +++ b/mysql-test/suite/mariabackup/mdev-14447.test @@ -18,15 +18,7 @@ INSERT INTO t select uuid(), uuid(), uuid(), uuid() from seq_1_to_100000; COMMIT; SELECT count(*) FROM t; -let $backuplog=$MYSQLTEST_VARDIR/tmp/backup.log; - -exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$incremental_dir --incremental-basedir=$basedir --dbug=+d,page_intermittent_checksum_mismatch 2> $backuplog; - ---let SEARCH_RANGE = 10000000 ---let SEARCH_PATTERN=Checksum mismatch in datafile ---let SEARCH_FILE=$backuplog ---source include/search_pattern_in_file.inc -remove_file $backuplog; +exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$incremental_dir --incremental-basedir=$basedir --dbug=+d,page_intermittent_checksum_mismatch; --disable_result_log echo # Prepare full backup, apply incremental one; diff --git a/mysql-test/suite/mariabackup/partial.result b/mysql-test/suite/mariabackup/partial.result index 8ccc8f6a6c7..981bef4e40c 100644 --- a/mysql-test/suite/mariabackup/partial.result +++ b/mysql-test/suite/mariabackup/partial.result @@ -4,8 +4,8 @@ CREATE TABLE t21(i INT) ENGINE INNODB; INSERT INTO t21 VALUES(1); CREATE TABLE t2(i int) ENGINE INNODB; # xtrabackup backup -t1.ibd -t21.ibd +t1.new +t21.new # xtrabackup prepare t1.cfg t21.cfg diff --git a/mysql-test/suite/mariabackup/partial.test b/mysql-test/suite/mariabackup/partial.test index 53388b1947f..d0d07daf2ea 100644 --- a/mysql-test/suite/mariabackup/partial.test +++ b/mysql-test/suite/mariabackup/partial.test @@ -17,6 +17,7 @@ let targetdir=$MYSQLTEST_VARDIR/tmp/backup; exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup "--tables=test.*1" --target-dir=$targetdir; --enable_result_log list_files $targetdir/test *.ibd; +list_files $targetdir/test *.new; # Inject a junk .ibd file into backup dir to # see if prepare does not choke on it. diff --git a/mysql-test/suite/mariabackup/partial_exclude.result b/mysql-test/suite/mariabackup/partial_exclude.result index 628613040e0..a31197b9e9d 100644 --- a/mysql-test/suite/mariabackup/partial_exclude.result +++ b/mysql-test/suite/mariabackup/partial_exclude.result @@ -9,7 +9,7 @@ USE db2; CREATE TABLE t1(i INT) ENGINE INNODB; USE test; # xtrabackup backup -t1.ibd +t1.new DROP TABLE t1; DROP TABLE t2; DROP DATABASE db2; diff --git a/mysql-test/suite/mariabackup/partial_exclude.test b/mysql-test/suite/mariabackup/partial_exclude.test index 99d14e58231..3642a2c6f46 100644 --- a/mysql-test/suite/mariabackup/partial_exclude.test +++ b/mysql-test/suite/mariabackup/partial_exclude.test @@ -27,6 +27,7 @@ exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup "--tables-ex --enable_result_log # check that only t1 table is in backup (t2 is excluded) +list_files $targetdir/test *.new; list_files $targetdir/test *.ibd; # check that db2 database is not in the backup (excluded) --error 1 diff --git a/mysql-test/suite/mariabackup/recreate_table_during_backup.result b/mysql-test/suite/mariabackup/recreate_table_during_backup.result index 821f9301ab6..3e01312cd3f 100644 --- a/mysql-test/suite/mariabackup/recreate_table_during_backup.result +++ b/mysql-test/suite/mariabackup/recreate_table_during_backup.result @@ -2,6 +2,7 @@ CREATE TABLE t1(i int) ENGINE=INNODB; CREATE TABLE t2(i int) ENGINE=INNODB; CREATE TABLE t3(a CHAR(36)) ENGINE INNODB; INSERT INTO t3 SELECT UUID() FROM seq_1_to_1000; +set global innodb_log_checkpoint_now=1; # xtrabackup backup # xtrabackup prepare # shutdown server diff --git a/mysql-test/suite/mariabackup/recreate_table_during_backup.test b/mysql-test/suite/mariabackup/recreate_table_during_backup.test index c3c9cf5aeef..1feb2c5c8b3 100644 --- a/mysql-test/suite/mariabackup/recreate_table_during_backup.test +++ b/mysql-test/suite/mariabackup/recreate_table_during_backup.test @@ -7,6 +7,7 @@ CREATE TABLE t2(i int) ENGINE=INNODB; CREATE TABLE t3(a CHAR(36)) ENGINE INNODB; INSERT INTO t3 SELECT UUID() FROM seq_1_to_1000; +set global innodb_log_checkpoint_now=1; # this will table and populate it, after backup has list of tables to be copied --let before_copy_test_t1=BEGIN NOT ATOMIC DROP TABLE test.t1;CREATE TABLE test.t1 ENGINE=INNODB SELECT UUID() from test.seq_1_to_100; END --let after_copy_test_t2=BEGIN NOT ATOMIC DROP TABLE test.t2;CREATE TABLE test.t2 ENGINE=INNODB SELECT UUID() from test.seq_1_to_1000; END diff --git a/mysql-test/suite/mariabackup/rename_during_backup.result b/mysql-test/suite/mariabackup/rename_during_backup.result index ba1dbec0e1b..e071b6b2e21 100644 --- a/mysql-test/suite/mariabackup/rename_during_backup.result +++ b/mysql-test/suite/mariabackup/rename_during_backup.result @@ -14,6 +14,7 @@ CREATE TABLE a1(a1 int) ENGINE INNODB; INSERT INTO a1 VALUES(1); CREATE TABLE b1(b1 CHAR(2)) ENGINE INNODB; INSERT INTO b1 VALUES('b1'); +set global innodb_log_checkpoint_now = 1; # xtrabackup prepare # shutdown server # remove datadir diff --git a/mysql-test/suite/mariabackup/rename_during_backup.test b/mysql-test/suite/mariabackup/rename_during_backup.test index 238a8b1985c..d8e40b28941 100644 --- a/mysql-test/suite/mariabackup/rename_during_backup.test +++ b/mysql-test/suite/mariabackup/rename_during_backup.test @@ -24,6 +24,8 @@ INSERT INTO a1 VALUES(1); CREATE TABLE b1(b1 CHAR(2)) ENGINE INNODB; INSERT INTO b1 VALUES('b1'); +set global innodb_log_checkpoint_now = 1; + # Test renames before of after copying tablespaces --let before_copy_test_t1=RENAME TABLE test.t1 TO test.t1_renamed --let after_copy_test_t2=RENAME TABLE test.t2 TO test.t2_renamed diff --git a/mysql-test/suite/mariabackup/rename_during_mdl_lock.result b/mysql-test/suite/mariabackup/rename_during_mdl_lock.result index 607460f4f05..074de33bb2f 100644 --- a/mysql-test/suite/mariabackup/rename_during_mdl_lock.result +++ b/mysql-test/suite/mariabackup/rename_during_mdl_lock.result @@ -1,4 +1,5 @@ CREATE TABLE t1(i int) ENGINE INNODB; +set global innodb_log_checkpoint_now = 1; # xtrabackup prepare # shutdown server # remove datadir diff --git a/mysql-test/suite/mariabackup/rename_during_mdl_lock.test b/mysql-test/suite/mariabackup/rename_during_mdl_lock.test index 6d22e0db4a7..212b7aabd69 100644 --- a/mysql-test/suite/mariabackup/rename_during_mdl_lock.test +++ b/mysql-test/suite/mariabackup/rename_during_mdl_lock.test @@ -2,6 +2,7 @@ let $targetdir=$MYSQLTEST_VARDIR/tmp/backup; mkdir $targetdir; CREATE TABLE t1(i int) ENGINE INNODB; +set global innodb_log_checkpoint_now = 1; exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir --lock-ddl-per-table --dbug=+d,rename_during_mdl_lock_table; echo # xtrabackup prepare; diff --git a/mysql-test/suite/mariabackup/unsupported_redo.result b/mysql-test/suite/mariabackup/unsupported_redo.result index f2cfcc730e5..4ba40f5a916 100644 --- a/mysql-test/suite/mariabackup/unsupported_redo.result +++ b/mysql-test/suite/mariabackup/unsupported_redo.result @@ -22,8 +22,8 @@ CREATE TABLE t2(i int) ENGINE INNODB; ALTER TABLE t21 FORCE, ALGORITHM=INPLACE; # Create partial backup (excluding table t21), Ignore the # unsupported redo log for the table t21. -t1.ibd -t2.ibd +t1.new +t2.new # Prepare the full backup t1.ibd t2.ibd diff --git a/mysql-test/suite/mariabackup/unsupported_redo.test b/mysql-test/suite/mariabackup/unsupported_redo.test index decf8cab174..b02bcc3f695 100644 --- a/mysql-test/suite/mariabackup/unsupported_redo.test +++ b/mysql-test/suite/mariabackup/unsupported_redo.test @@ -60,6 +60,7 @@ ALTER TABLE t21 FORCE, ALGORITHM=INPLACE; exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup "--tables-exclude=test.t21" --target-dir=$targetdir; --enable_result_log --list_files $targetdir/test *.ibd +--list_files $targetdir/test *.new --echo # Prepare the full backup --disable_result_log diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index a16eb79dccd..f4e62c6e7f2 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -3258,25 +3258,12 @@ void buf_block_t::initialise(const page_id_t page_id, ulint zip_size, page_zip_set_size(&page.zip, zip_size); } -/** Initialize a page in the buffer pool. The page is usually not read -from a file even if it cannot be found in the buffer buf_pool. This is one -of the functions which perform to a block a state transition NOT_USED => -FILE_PAGE (the other is buf_page_get_gen). -@param[in,out] space space object -@param[in] offset offset of the tablespace -@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 -@param[in,out] mtr mini-transaction -@param[in,out] free_block pre-allocated buffer block -@return pointer to the block, page bufferfixed */ -buf_block_t* -buf_page_create(fil_space_t *space, uint32_t offset, - ulint zip_size, mtr_t *mtr, buf_block_t *free_block) +static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size, + mtr_t *mtr, buf_block_t *free_block) { - page_id_t page_id(space->id, offset); ut_ad(mtr->is_active()); ut_ad(page_id.space() != 0 || !zip_size); - space->free_page(offset, false); free_block->initialise(page_id, zip_size, 1); const ulint fold= page_id.fold(); @@ -3440,6 +3427,39 @@ loop: return block; } +/** Initialize a page in the buffer pool. The page is usually not read +from a file even if it cannot be found in the buffer buf_pool. This is one +of the functions which perform to a block a state transition NOT_USED => +FILE_PAGE (the other is buf_page_get_gen). +@param[in,out] space space object +@param[in] offset offset of the tablespace + or deferred space id if space + object is null +@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 +@param[in,out] mtr mini-transaction +@param[in,out] free_block pre-allocated buffer block +@return pointer to the block, page bufferfixed */ +buf_block_t* +buf_page_create(fil_space_t *space, uint32_t offset, + ulint zip_size, mtr_t *mtr, buf_block_t *free_block) +{ + space->free_page(offset, false); + return buf_page_create_low({space->id, offset}, zip_size, mtr, free_block); +} + +/** Initialize a page in buffer pool while initializing the +deferred tablespace +@param space_id space identfier +@param zip_size ROW_FORMAT=COMPRESSED page size or 0 +@param mtr mini-transaction +@param free_block pre-allocated buffer block +@return pointer to the block, page bufferfixed */ +buf_block_t* buf_page_create_deferred(uint32_t space_id, ulint zip_size, + mtr_t *mtr, buf_block_t *free_block) +{ + return buf_page_create_low({space_id, 0}, zip_size, mtr, free_block); +} + /** Monitor the buffer page read/write activity, and increment corresponding counter value in MONITOR_MODULE_BUF_PAGE. @param bpage buffer page whose read or write was completed diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 39e01cb67c6..e6fe9602f89 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2134,46 +2134,9 @@ err_exit: crypt_data->fill_page0(flags, page); } - if (ulint zip_size = fil_space_t::zip_size(flags)) { - page_zip_des_t page_zip; - page_zip_set_size(&page_zip, zip_size); - page_zip.data = page + srv_page_size; -#ifdef UNIV_DEBUG - page_zip.m_start = 0; -#endif /* UNIV_DEBUG */ - page_zip.m_end = 0; - page_zip.m_nonempty = 0; - page_zip.n_blobs = 0; - - buf_flush_init_for_writing(NULL, page, &page_zip, false); - - *err = os_file_write(IORequestWrite, path, file, - page_zip.data, 0, zip_size); - } else { - buf_flush_init_for_writing(NULL, page, NULL, - fil_space_t::full_crc32(flags)); - - *err = os_file_write(IORequestWrite, path, file, - page, 0, srv_page_size); - } - aligned_free(page); fil_space_t::name_type space_name; - if (*err != DB_SUCCESS) { - ib::error() - << "Could not write the first page to" - << " tablespace '" << path << "'"; - goto err_exit; - } - - if (!os_file_flush(file)) { - ib::error() << "File flush of tablespace '" - << path << "' failed"; - *err = DB_ERROR; - goto err_exit; - } - if (has_data_dir) { /* Make the ISL file if the IBD file is not in the default location. */ @@ -2657,15 +2620,23 @@ fil_ibd_load( } os_offset_t size; + bool deferred_space = false; /* Read and validate the first page of the tablespace. Assign a tablespace name based on the tablespace type. */ switch (file.validate_for_recovery()) { os_offset_t minimum_size; case DB_SUCCESS: + deferred_space = file.m_defer; + + if (deferred_space) { + goto tablespace_check; + } + if (file.space_id() != space_id) { return(FIL_LOAD_ID_CHANGED); } +tablespace_check: /* Get and test the file size. */ size = os_file_get_size(file.handle()); @@ -2681,6 +2652,8 @@ fil_ibd_load( ib::error() << "Could not measure the size of" " single-table tablespace file '" << file.filepath() << "'"; + } else if (deferred_space) { + return FIL_LOAD_DEFER; } else if (size < minimum_size) { ib::error() << "The size of tablespace file '" << file.filepath() << "' is only " << size diff --git a/storage/innobase/fsp/fsp0file.cc b/storage/innobase/fsp/fsp0file.cc index 77faf58edcf..5d49d14f65c 100644 --- a/storage/innobase/fsp/fsp0file.cc +++ b/storage/innobase/fsp/fsp0file.cc @@ -280,11 +280,8 @@ Datafile::read_first_page(bool read_only_mode) } else if (srv_operation == SRV_OPERATION_BACKUP) { break; } else { - - ib::error() - << "Cannot read first page of '" - << m_filepath << "' " - << err; + ib::error() << "Cannot read first page of '" + << m_filepath << "': " << err; break; } } @@ -424,6 +421,9 @@ Datafile::validate_for_recovery() " the first 64 pages."; return(err); } + if (m_space_id == ULINT_UNDEFINED) { + return DB_SUCCESS; /* empty file */ + } if (restore_from_doublewrite()) { return(DB_CORRUPTION); @@ -467,11 +467,18 @@ dberr_t Datafile::validate_first_page(lsn_t *flush_lsn) if (error_txt != NULL) { err_exit: + free_first_page(); + + if (recv_recovery_is_on() + || srv_operation == SRV_OPERATION_BACKUP) { + m_defer= true; + return DB_SUCCESS; + } + ib::info() << error_txt << " in datafile: " << m_filepath << ", Space ID:" << m_space_id << ", Flags: " << m_flags; m_is_valid = false; - free_first_page(); return(DB_CORRUPTION); } @@ -500,13 +507,18 @@ err_exit: ulint logical_size = fil_space_t::logical_size(m_flags); if (srv_page_size != logical_size) { + free_first_page(); + if (recv_recovery_is_on() + || srv_operation == SRV_OPERATION_BACKUP) { + m_defer= true; + return DB_SUCCESS; + } /* Logical size must be innodb_page_size. */ ib::error() << "Data file '" << m_filepath << "' uses page size " << logical_size << ", but the innodb_page_size" " start-up parameter is " << srv_page_size; - free_first_page(); return(DB_ERROR); } @@ -535,10 +547,18 @@ err_exit: fil_node_t* node = UT_LIST_GET_FIRST(space->chain); if (node && !strcmp(m_filepath, node->name)) { +ok_exit: mysql_mutex_unlock(&fil_system.mutex); return DB_SUCCESS; } + if (!m_space_id + && (recv_recovery_is_on() + || srv_operation == SRV_OPERATION_BACKUP)) { + m_defer= true; + goto ok_exit; + } + /* Make sure the space_id has not already been opened. */ ib::error() << "Attempted to open a previously opened" " tablespace. Previous tablespace: " @@ -575,6 +595,10 @@ Datafile::find_space_id() file_size = os_file_get_size(m_handle); + if (!file_size) { + return DB_SUCCESS; + } + if (file_size == (os_offset_t) -1) { ib::error() << "Could not get file size of datafile '" << m_filepath << "'"; diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index fede1311cd1..c42977b5eda 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -314,6 +314,17 @@ buf_block_t* buf_page_create(fil_space_t *space, uint32_t offset, ulint zip_size, mtr_t *mtr, buf_block_t *free_block); +/** Initialize a page in buffer pool while initializing the +deferred tablespace +@param space_id space identfier +@param zip_size ROW_FORMAT=COMPRESSED page size or 0 +@param mtr mini-transaction +@param free_block pre-allocated buffer block +@return pointer to the block, page bufferfixed */ +buf_block_t* +buf_page_create_deferred(uint32_t space_id, ulint zip_size, mtr_t *mtr, + buf_block_t *free_block); + /********************************************************************//** Releases a compressed-only page acquired with buf_page_get_zip(). */ UNIV_INLINE diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index 679f3735392..18b6a91abe2 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -99,6 +99,7 @@ this must be equal to srv_page_size */ class page_id_t { public: + /** Constructor from (space, page_no). @param[in] space tablespace id @param[in] page_no page number */ @@ -152,6 +153,7 @@ public: } ulonglong raw() { return m_id; } + private: /** The page identifier */ uint64_t m_id; diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index bd953566d23..225f05545c6 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -507,6 +507,8 @@ public: /** @return whether the storage device is rotational (HDD, not SSD) */ inline bool is_rotational() const; + inline bool is_deferred() const; + /** Open each file. Never invoked on .ibd files. @param create_new_db whether to skip the call to fil_node_t::read_page0() @return whether all files were opened */ @@ -1088,6 +1090,10 @@ struct fil_node_t final /** Filesystem block size */ ulint block_size; + /** Deferring the tablespace during recovery and it + can be used to skip the validation of page0 */ + bool deferred=false; + /** FIL_NODE_MAGIC_N */ ulint magic_n; @@ -1145,6 +1151,11 @@ inline bool fil_space_t::is_rotational() const return false; } +inline bool fil_space_t::is_deferred() const +{ + return UT_LIST_GET_FIRST(chain)->deferred; +} + /** Common InnoDB file extensions */ enum ib_extention { NO_EXT = 0, @@ -1473,8 +1484,13 @@ public: @retval NULL if this was the last */ fil_space_t* keyrotate_next(fil_space_t* space, bool recheck, bool encrypt); - /** Extend all open data files to the recovered size */ - ATTRIBUTE_COLD void extend_to_recv_size(); + /** Extend all open data files to the recovered size */ + ATTRIBUTE_COLD void extend_to_recv_size(); + + /** Determine if a tablespace associated with a file name exists. + @param path tablespace file name to look for + @return a matching tablespace */ + inline fil_space_t *find(const char *path) const; }; /** The tablespace memory cache. */ @@ -1684,7 +1700,9 @@ enum fil_load_status { /** The file(s) were not found */ FIL_LOAD_NOT_FOUND, /** The file(s) were not valid */ - FIL_LOAD_INVALID + FIL_LOAD_INVALID, + /** The tablespace file was deferred to open */ + FIL_LOAD_DEFER }; /** Open a single-file tablespace and add it to the InnoDB data structures. diff --git a/storage/innobase/include/fsp0file.h b/storage/innobase/include/fsp0file.h index 517b3f72499..3ec56c75476 100644 --- a/storage/innobase/include/fsp0file.h +++ b/storage/innobase/include/fsp0file.h @@ -324,6 +324,9 @@ public: @return the first data page */ const byte* get_first_page() const { return(m_first_page); } + void set_space_id(ulint space_id) { m_space_id= space_id; } + + void set_flags(ulint flags) { m_flags = flags; } private: /** Free the filepath buffer. */ void free_filepath(); @@ -443,6 +446,8 @@ protected: ulint m_last_os_error; public: + /** true if table is deferred during recovery */ + bool m_defer=false; /** Use the following to determine the uniqueness of this datafile. */ #ifdef _WIN32 /* Use fields dwVolumeSerialNumber, nFileIndexLow, nFileIndexHigh. */ diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index b3cfb2a1914..46c09fecf7a 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -405,6 +405,15 @@ public: { return UNIV_UNLIKELY(recovery_on) ? recover_low(page_id) : nullptr; } + + /** Try to recover a tablespace that was not readable earlier + @param p iterator, initially pointing to page_id_t{space_id,0}; + the records will be freed and the iterator advanced + @param name tablespace file name + @param free_block spare buffer block + @return whether recovery failed */ + bool recover_deferred(map::iterator &p, const std::string &name, + buf_block_t *&free_block); }; /** The recovery system */ diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index bd181ddc249..933a0d6987c 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -587,6 +587,220 @@ static recv_spaces_t recv_spaces; /** The last parsed FILE_RENAME records */ static std::map<uint32_t,std::string> renamed_spaces; +/** Files for which fil_ibd_load() returned FIL_LOAD_DEFER */ +static struct +{ + /** Maintains the last opened defer file name along with lsn */ + struct item + { + /** Log sequence number of latest add() called by fil_name_process() */ + lsn_t lsn; + /** File name from the FILE_ record */ + std::string file_name; + }; + + using map= std::map<const uint32_t, item, std::less<const uint32_t>, + ut_allocator<std::pair<const uint32_t, item> > >; + + /** Map of defer tablespaces */ + map defers; + + /** Add the deferred space only if it is latest one + @param space space identifier + @param f_name file name + @param lsn log sequence number of the FILE_ record */ + void add(uint32_t space, const std::string &f_name, lsn_t lsn) + { + mysql_mutex_assert_owner(&recv_sys.mutex); + const char *filename= f_name.c_str(); + + if (srv_operation == SRV_OPERATION_RESTORE) + { + /* Replace absolute DATA DIRECTORY file paths with + short names relative to the backup directory. */ + const char *name= strrchr(filename, '/'); +#ifdef _WIN32 + if (const char *last= strrchr(filename, '\\')) + if (last > name) + name= last; +#endif + if (name) + { + while (--name > filename && +#ifdef _WIN32 + *name != '\\' && +#endif + *name != '/'); + if (name > filename) + filename= name + 1; + } + } + + char *fil_path= fil_make_filepath(nullptr, {filename, strlen(filename)}, + IBD, false); + const item defer= {lsn, fil_path}; + auto p= defers.emplace(space, defer); + if (!p.second && p.first->second.lsn <= defer.lsn) + p.first->second= defer; + ut_free(fil_path); + } + + void remove(uint32_t space) + { + mysql_mutex_assert_owner(&recv_sys.mutex); + defers.erase(space); + } + + /** Look up a tablespace that was found corrupted during recovery. + @param id tablespace id + @return tablespace whose creation was deferred + @retval nullptr if no such tablespace was found */ + const item *find(uint32_t id) + { + mysql_mutex_assert_owner(&recv_sys.mutex); + auto it= defers.find(id); + if (it != defers.end()) + return &it->second; + return nullptr; + } + + void clear() + { + mysql_mutex_assert_owner(&recv_sys.mutex); + defers.clear(); + } + + /** Initialize all deferred tablespaces. + @return whether any deferred initialization failed */ + bool reinit_all() + { +retry: + bool fail= false; + buf_block_t *free_block= buf_LRU_get_free_block(false); + mysql_mutex_lock(&recv_sys.mutex); + + for (auto d= defers.begin(); d != defers.end(); ) + { + const uint32_t space_id{d->first}; + recv_sys_t::map::iterator p{recv_sys.pages.lower_bound({space_id,0})}; + + if (p == recv_sys.pages.end() || p->first.space() != space_id) + { + /* No pages were recovered. We create a dummy tablespace, + and let dict_drop_index_tree() delete the file. */ + recv_spaces_t::iterator it{recv_spaces.find(space_id)}; + if (it != recv_spaces.end()) + create(it, d->second.file_name, static_cast<uint32_t> + (1U << FSP_FLAGS_FCRC32_POS_MARKER | + FSP_FLAGS_FCRC32_PAGE_SSIZE()), nullptr, 0); + } + else + fail= recv_sys.recover_deferred(p, d->second.file_name, free_block); + auto e= d++; + defers.erase(e); + if (fail) + break; + if (free_block) + continue; + mysql_mutex_unlock(&recv_sys.mutex); + goto retry; + } + + clear(); + mysql_mutex_unlock(&recv_sys.mutex); + if (free_block) + buf_pool.free_block(free_block); + return fail; + } + + /** Create tablespace metadata for a data file that was initially + found corrupted during recovery. + @param it tablespace iterator + @param name latest file name + @param flags FSP_SPACE_FLAGS + @param crypt_data encryption metadata + @param size tablespace size in pages + @return tablespace */ + static fil_space_t *create(const recv_spaces_t::const_iterator &it, + const std::string &name, uint32_t flags, + fil_space_crypt_t *crypt_data, uint32_t size) + { + fil_space_t *space= fil_space_t::create(it->first, flags, + FIL_TYPE_TABLESPACE, crypt_data); + ut_ad(space); + space->add(name.c_str(), OS_FILE_CLOSED, size, false, false); + space->recv_size= it->second.size; + space->size_in_header= size; + return space; + } +} +deferred_spaces; + +/** Try to recover a tablespace that was not readable earlier +@param p iterator, initially pointing to page_id_t{space_id,0}; + the records will be freed and the iterator advanced +@param name tablespace file name +@param free_block spare buffer block +@return whether recovery failed */ +bool recv_sys_t::recover_deferred(recv_sys_t::map::iterator &p, + const std::string &name, + buf_block_t *&free_block) +{ + mysql_mutex_assert_owner(&mutex); + + const page_id_t first{p->first}; + ut_ad(first.space()); + + recv_spaces_t::iterator it{recv_spaces.find(first.space())}; + ut_ad(it != recv_spaces.end()); + + if (!first.page_no() && p->second.state == page_recv_t::RECV_WILL_NOT_READ) + { + mtr_t mtr; + buf_block_t *block= recover_low(first, p, mtr, free_block); + ut_ad(block == free_block); + free_block= nullptr; + + const byte *page= UNIV_LIKELY_NULL(block->page.zip.data) + ? block->page.zip.data + : block->frame; + const uint32_t space_id= mach_read_from_4(page + FIL_PAGE_SPACE_ID); + const uint32_t flags= fsp_header_get_flags(page); + const uint32_t page_no= mach_read_from_4(page + FIL_PAGE_OFFSET); + const uint32_t size= fsp_header_get_field(page, FSP_SIZE); + + ut_ad(it != recv_spaces.end()); + + if (page_id_t{space_id, page_no} == first && size >= 4 && + it != recv_spaces.end() && + fil_space_t::is_valid_flags(flags, space_id) && + fil_space_t::logical_size(flags) == srv_page_size) + { + fil_space_t *space= deferred_spaces.create(it, name, flags, + fil_space_read_crypt_data + (fil_space_t::zip_size(flags), + page), size); + space->free_limit= fsp_header_get_field(page, FSP_FREE_LIMIT); + space->free_len= flst_get_len(FSP_HEADER_OFFSET + FSP_FREE + page); + block->unfix(); + fil_node_t *node= UT_LIST_GET_FIRST(space->chain); + node->deferred= true; + if (space->acquire()) + { + node->deferred= false; + space->release(); + return false; + } + } + + block->unfix(); + } + + ib::error() << "Cannot apply log to " << first + << " of corrupted file '" << name << "'"; + return true; +} + /** Report an operation to create, delete, or rename a file during backup. @param[in] space_id tablespace identifier @param[in] create whether the file is being created @@ -790,10 +1004,14 @@ inline size_t recv_sys_t::files_size() @param[in,out] name file name @param[in] len length of the file name @param[in] space_id the tablespace ID -@param[in] deleted whether this is a FILE_DELETE record */ +@param[in] deleted whether this is a FILE_DELETE record +@param[in] lsn lsn of the redo log +@param[in] store whether the redo log has to + stored */ static void -fil_name_process(char* name, ulint len, ulint space_id, bool deleted) +fil_name_process(char* name, ulint len, ulint space_id, + bool deleted, lsn_t lsn, store_t *store) { if (srv_operation == SRV_OPERATION_BACKUP) { return; @@ -817,6 +1035,8 @@ fil_name_process(char* name, ulint len, ulint space_id, bool deleted) if (deleted) { /* Got FILE_DELETE */ + deferred_spaces.remove( + static_cast<uint32_t>(space_id)); if (!p.second && f.status != file_name_t::DELETED) { f.status = file_name_t::DELETED; if (f.space != NULL) { @@ -838,6 +1058,8 @@ fil_name_process(char* name, ulint len, ulint space_id, bool deleted) case FIL_LOAD_OK: ut_ad(space != NULL); + deferred_spaces.remove( + static_cast<uint32_t>(space_id)); if (!f.space) { if (f.size || f.flags != f.initial_flags) { @@ -885,6 +1107,15 @@ same_space: } break; + case FIL_LOAD_DEFER: + /** Skip the deferred spaces + when lsn is already processed */ + if (*store != store_t::STORE_IF_EXISTS) { + deferred_spaces.add( + static_cast<uint32_t>(space_id), + name, lsn); + } + break; case FIL_LOAD_INVALID: ut_ad(space == NULL); if (srv_force_recovery == 0) { @@ -931,6 +1162,7 @@ void recv_sys_t::close() dblwr.pages.clear(); ut_d(mysql_mutex_lock(&mutex)); clear(); + deferred_spaces.clear(); ut_d(mysql_mutex_unlock(&mutex)); if (buf) @@ -947,7 +1179,6 @@ void recv_sys_t::close() recv_spaces.clear(); renamed_spaces.clear(); mlog_init.clear(); - close_files(); } @@ -2090,7 +2321,7 @@ same_page: if (!size) continue; } - else + else if (!deferred_spaces.find(space_id)) continue; /* fall through */ case STORE_YES: @@ -2200,10 +2431,11 @@ same_page: const char saved_end= fn[rlen]; const_cast<char&>(fn[rlen])= '\0'; fil_name_process(const_cast<char*>(fn), fnend - fn, space_id, - (b & 0xf0) == FILE_DELETE); + (b & 0xf0) == FILE_DELETE, start_lsn, + store); if (fn2) fil_name_process(const_cast<char*>(fn2), fn2end - fn2, space_id, - false); + false, start_lsn, store); if ((b & 0xf0) < FILE_MODIFY && log_file_op) log_file_op(space_id, (b & 0xf0) == FILE_CREATE, l, static_cast<ulint>(fnend - fn), @@ -2590,39 +2822,60 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, buf_block_t* block= nullptr; mlog_init_t::init &i= mlog_init.last(page_id); const lsn_t end_lsn = recs.log.last()->lsn; + bool first_page= page_id.page_no() == 0; if (end_lsn < i.lsn) DBUG_LOG("ib_log", "skip log for page " << page_id << " LSN " << end_lsn << " < " << i.lsn); - else if (fil_space_t *space= fil_space_t::get(page_id.space())) + fil_space_t *space= fil_space_t::get(page_id.space()); + + if (!space && !first_page) + return block; + + mtr.start(); + mtr.set_log_mode(MTR_LOG_NO_REDO); + + ulint zip_size= space ? space->zip_size() : 0; + + if (!space) { - mtr.start(); - mtr.set_log_mode(MTR_LOG_NO_REDO); - block= buf_page_create(space, page_id.page_no(), space->zip_size(), &mtr, - b); - if (UNIV_UNLIKELY(block != b)) - { - /* The page happened to exist in the buffer pool, or it was just - being read in. Before buf_page_get_with_no_latch() returned to - buf_page_create(), all changes must have been applied to the - page already. */ - ut_ad(pages.find(page_id) == pages.end()); - mtr.commit(); - block= nullptr; - } - else - { - ut_ad(&recs == &pages.find(page_id)->second); - i.created= true; - recv_recover_page(block, mtr, p, space, &i); - ut_ad(mtr.has_committed()); - recs.log.clear(); - map::iterator r= p++; - pages.erase(r); - if (pages.empty()) - pthread_cond_signal(&cond); - } - space->release(); + auto it= recv_spaces.find(page_id.space()); + ut_ad(it != recv_spaces.end()); + uint32_t flags= it->second.flags; + zip_size= fil_space_t::zip_size(flags); + block= buf_page_create_deferred(page_id.space(), zip_size, &mtr, b); + } + else + block= buf_page_create(space, page_id.page_no(), zip_size, &mtr, b); + + if (UNIV_UNLIKELY(block != b)) + { + /* The page happened to exist in the buffer pool, or it + was just being read in. Before buf_page_get_with_no_latch() + returned to buf_page_create(), all changes must have been + applied to the page already. */ + ut_ad(pages.find(page_id) == pages.end()); + mtr.commit(); + block= nullptr; } + else + { + /* Buffer fix the first page while deferring the tablespace + and unfix it after creating defer tablespace */ + if (first_page && !space) + block->fix(); + ut_ad(&recs == &pages.find(page_id)->second); + i.created= true; + recv_recover_page(block, mtr, p, space, &i); + ut_ad(mtr.has_committed()); + recs.log.clear(); + map::iterator r= p++; + pages.erase(r); + if (pages.empty()) + pthread_cond_signal(&cond); + } + + if (space) + space->release(); return block; } @@ -2651,6 +2904,15 @@ buf_block_t *recv_sys_t::recover_low(const page_id_t page_id) return block; } +inline fil_space_t *fil_system_t::find(const char *path) const +{ + mysql_mutex_assert_owner(&mutex); + for (fil_space_t &space : fil_system.space_list) + if (space.chain.start && !strcmp(space.chain.start->name, path)) + return &space; + return nullptr; +} + /** Apply buffered log to persistent data pages. @param last_batch whether it is possible to write more redo log */ void recv_sys_t::apply(bool last_batch) @@ -2722,6 +2984,28 @@ void recv_sys_t::apply(bool last_batch) page_recv_t &recs= p->second; ut_ad(!recs.log.empty()); + const uint32_t space_id= page_id.space(); + auto d= deferred_spaces.defers.find(space_id); + if (d != deferred_spaces.defers.end()) + { + if (recover_deferred(p, d->second.file_name, free_block)) + { + if (!srv_force_recovery) + set_corrupt_fs(); + while (p != pages.end() && p->first.space() == space_id) + { + map::iterator r= p++; + r->second.log.clear(); + pages.erase(r); + } + } + deferred_spaces.defers.erase(d); + if (!free_block) + goto next_free_block; + p= pages.lower_bound(page_id); + continue; + } + switch (recs.state) { case page_recv_t::RECV_BEING_READ: case page_recv_t::RECV_BEING_PROCESSED: @@ -2730,6 +3014,7 @@ void recv_sys_t::apply(bool last_batch) case page_recv_t::RECV_WILL_NOT_READ: if (UNIV_LIKELY(!!recover_low(page_id, p, mtr, free_block))) { +next_free_block: mysql_mutex_unlock(&mutex); free_block= buf_LRU_get_free_block(false); mysql_mutex_lock(&mutex); @@ -2824,48 +3109,6 @@ next_page: buf_pool_invalidate(); mysql_mutex_lock(&log_sys.mutex); } -#if 1 /* Mariabackup FIXME: Remove or adjust rename_table_in_prepare() */ - else if (srv_operation != SRV_OPERATION_NORMAL); -#endif - else - { - /* In the last batch, we will apply any rename operations. */ - for (auto r : renamed_spaces) - { - const uint32_t id= r.first; - fil_space_t *space= fil_space_t::get(id); - if (!space) - continue; - ut_ad(UT_LIST_GET_LEN(space->chain) == 1); - const char *old= space->chain.start->name; - if (r.second != old) - { - bool exists; - os_file_type_t ftype; - const char *new_name= r.second.c_str(); - if (!os_file_status(new_name, &exists, &ftype) || exists) - { - ib::error() << "Cannot replay rename of tablespace " << id - << " from '" << old << "' to '" << r.second << - (exists ? "' because the target file exists" : "'"); - found_corrupt_fs= true; - } - else - { - mysql_mutex_lock(&log_sys.mutex); - if (dberr_t err= space->rename(r.second.c_str(), false)) - { - ib::error() << "Cannot replay rename of tablespace " << id - << " to '" << r.second << "': " << err; - found_corrupt_fs= true; - } - mysql_mutex_unlock(&log_sys.mutex); - } - } - space->release(); - } - renamed_spaces.clear(); - } mysql_mutex_lock(&mutex); @@ -3307,6 +3550,12 @@ next: recv_spaces_t::iterator i = recv_spaces.find(space); ut_ad(i != recv_spaces.end()); + if (deferred_spaces.find(static_cast<uint32_t>(space))) { + /* Skip redo logs belonging to + incomplete tablespaces */ + goto next; + } + switch (i->second.status) { case file_name_t::NORMAL: goto next; @@ -3337,6 +3586,10 @@ func_exit: continue; } + if (deferred_spaces.find(static_cast<uint32_t>(rs.first))) { + continue; + } + missing_tablespace = true; if (srv_force_recovery > 0) { @@ -3422,6 +3675,77 @@ recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace) return DB_SUCCESS; } +/** Apply any FILE_RENAME records */ +static dberr_t recv_rename_files() +{ + mysql_mutex_assert_owner(&recv_sys.mutex); + mysql_mutex_assert_owner(&log_sys.mutex); + + dberr_t err= DB_SUCCESS; + + for (const auto &r : renamed_spaces) + { + const uint32_t id= r.first; + fil_space_t *space= fil_space_t::get(id); + if (!space) + continue; + ut_ad(UT_LIST_GET_LEN(space->chain) == 1); + char *old= space->chain.start->name; + if (r.second != old) + { + bool exists; + os_file_type_t ftype; + const char *new_name= r.second.c_str(); + mysql_mutex_lock(&fil_system.mutex); + const fil_space_t *other= nullptr; + if (!space->chain.start->is_open() && space->chain.start->deferred && + (other= fil_system.find(new_name)) && + (other->chain.start->is_open() || !other->chain.start->deferred)) + other= nullptr; + + if (other) + { + /* Multiple tablespaces use the same file name. This should + only be possible if the recovery of both files was deferred + (no valid page 0 is contained in either file). We shall not + rename the file, just rename the metadata. */ + ib::info() << "Renaming tablespace metadata " << id + << " from '" << old << "' to '" << r.second + << "' that is also associated with tablespace " + << other->id; + space->chain.start->name= mem_strdup(new_name); + ut_free(old); + } + else if (!os_file_status(new_name, &exists, &ftype) || exists) + { + ib::error() << "Cannot replay rename of tablespace " << id + << " from '" << old << "' to '" << r.second << + (exists ? "' because the target file exists" : "'"); + err= DB_TABLESPACE_EXISTS; + } + else + { + mysql_mutex_unlock(&fil_system.mutex); + err= space->rename(new_name, false); + if (err != DB_SUCCESS) + ib::error() << "Cannot replay rename of tablespace " << id + << " to '" << r.second << "': " << err; + goto done; + } + mysql_mutex_unlock(&fil_system.mutex); + } +done: + space->release(); + if (err != DB_SUCCESS) + { + recv_sys.set_corrupt_fs(); + break; + } + } + renamed_spaces.clear(); + return err; +} + /** Start recovering from a redo log checkpoint. @param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN of first system tablespace page @@ -3722,6 +4046,9 @@ completed: recv_no_ibuf_operations = false; ut_d(recv_no_log_write = srv_operation == SRV_OPERATION_RESTORE || srv_operation == SRV_OPERATION_RESTORE_EXPORT); + if (srv_operation == SRV_OPERATION_NORMAL) { + err = recv_rename_files(); + } mysql_mutex_unlock(&recv_sys.mutex); mysql_mutex_unlock(&log_sys.mutex); @@ -3730,8 +4057,12 @@ completed: /* The database is now ready to start almost normal processing of user transactions: transaction rollbacks and the application of the log records in the hash table can be run in background. */ + if (err == DB_SUCCESS && deferred_spaces.reinit_all() + && !srv_force_recovery) { + err = DB_CORRUPTION; + } - return(DB_SUCCESS); + return err; } bool recv_dblwr_t::validate_page(const page_id_t page_id, diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 97e197201f2..74cf4d8d427 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -4462,110 +4462,111 @@ void fil_node_t::find_metadata(os_file_t file @return whether the page was found valid */ bool fil_node_t::read_page0() { - mysql_mutex_assert_owner(&fil_system.mutex); - const unsigned psize = space->physical_size(); + mysql_mutex_assert_owner(&fil_system.mutex); + const unsigned psize= space->physical_size(); #ifndef _WIN32 - struct stat statbuf; - if (fstat(handle, &statbuf)) { - return false; - } - os_offset_t size_bytes = statbuf.st_size; + struct stat statbuf; + if (fstat(handle, &statbuf)) + return false; + os_offset_t size_bytes= statbuf.st_size; #else - os_offset_t size_bytes = os_file_get_size(handle); - ut_a(size_bytes != (os_offset_t) -1); + os_offset_t size_bytes= os_file_get_size(handle); + ut_a(size_bytes != (os_offset_t) -1); #endif - const uint32_t min_size = FIL_IBD_FILE_INITIAL_SIZE * psize; + const uint32_t min_size= FIL_IBD_FILE_INITIAL_SIZE * psize; - if (size_bytes < min_size) { - ib::error() << "The size of the file " << name - << " is only " << size_bytes - << " bytes, should be at least " << min_size; - return false; - } + if (size_bytes < min_size) + { + ib::error() << "The size of the file " << name + << " is only " << size_bytes + << " bytes, should be at least " << min_size; + return false; + } - page_t *page= static_cast<byte*>(aligned_malloc(psize, psize)); - if (os_file_read(IORequestRead, handle, page, 0, psize) - != DB_SUCCESS) { - ib::error() << "Unable to read first page of file " << name; + if (!deferred) + { + page_t *page= static_cast<byte*>(aligned_malloc(psize, psize)); + if (os_file_read(IORequestRead, handle, page, 0, psize) + != DB_SUCCESS) + { + ib::error() << "Unable to read first page of file " << name; corrupted: - aligned_free(page); - return false; - } + aligned_free(page); + return false; + } - const ulint space_id = memcmp_aligned<2>( - FIL_PAGE_SPACE_ID + page, - FSP_HEADER_OFFSET + FSP_SPACE_ID + page, 4) - ? ULINT_UNDEFINED - : mach_read_from_4(FIL_PAGE_SPACE_ID + page); - ulint flags = fsp_header_get_flags(page); - const uint32_t size = fsp_header_get_field(page, FSP_SIZE); - const uint32_t free_limit = fsp_header_get_field(page, FSP_FREE_LIMIT); - const uint32_t free_len = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE - + page); - if (!fil_space_t::is_valid_flags(flags, space->id)) { - ulint cflags = fsp_flags_convert_from_101(flags); - if (cflags == ULINT_UNDEFINED) { + const ulint space_id= memcmp_aligned<2> + (FIL_PAGE_SPACE_ID + page, + FSP_HEADER_OFFSET + FSP_SPACE_ID + page, 4) + ? ULINT_UNDEFINED + : mach_read_from_4(FIL_PAGE_SPACE_ID + page); + ulint flags= fsp_header_get_flags(page); + const uint32_t size= fsp_header_get_field(page, FSP_SIZE); + const uint32_t free_limit= fsp_header_get_field(page, FSP_FREE_LIMIT); + const uint32_t free_len= flst_get_len(FSP_HEADER_OFFSET + FSP_FREE + page); + if (!fil_space_t::is_valid_flags(flags, space->id)) + { + ulint cflags= fsp_flags_convert_from_101(flags); + if (cflags == ULINT_UNDEFINED) + { invalid: - ib::error() - << "Expected tablespace flags " - << ib::hex(space->flags) - << " but found " << ib::hex(flags) - << " in the file " << name; - goto corrupted; - } + ib::error() << "Expected tablespace flags " + << ib::hex(space->flags) + << " but found " << ib::hex(flags) + << " in the file " << name; + goto corrupted; + } - ulint cf = cflags & ~FSP_FLAGS_MEM_MASK; - ulint sf = space->flags & ~FSP_FLAGS_MEM_MASK; + ulint cf= cflags & ~FSP_FLAGS_MEM_MASK; + ulint sf= space->flags & ~FSP_FLAGS_MEM_MASK; - if (!fil_space_t::is_flags_equal(cf, sf) - && !fil_space_t::is_flags_equal(sf, cf)) { - goto invalid; - } + if (!fil_space_t::is_flags_equal(cf, sf) && + !fil_space_t::is_flags_equal(sf, cf)) + goto invalid; + flags= cflags; + } - flags = cflags; - } + ut_ad(!(flags & FSP_FLAGS_MEM_MASK)); - ut_ad(!(flags & FSP_FLAGS_MEM_MASK)); + /* Try to read crypt_data from page 0 if it is not yet read. */ + if (!space->crypt_data) + space->crypt_data= fil_space_read_crypt_data( + fil_space_t::zip_size(flags), page); + aligned_free(page); - /* Try to read crypt_data from page 0 if it is not yet read. */ - if (!space->crypt_data) { - space->crypt_data = fil_space_read_crypt_data( - fil_space_t::zip_size(flags), page); - } - aligned_free(page); + if (UNIV_UNLIKELY(space_id != space->id)) + { + ib::error() << "Expected tablespace id " << space->id + << " but found " << space_id + << " in the file " << name; + return false; + } - if (UNIV_UNLIKELY(space_id != space->id)) { - ib::error() << "Expected tablespace id " << space->id - << " but found " << space_id - << " in the file " << name; - return false; - } + space->flags= (space->flags & FSP_FLAGS_MEM_MASK) | flags; + ut_ad(space->free_limit == 0 || space->free_limit == free_limit); + ut_ad(space->free_len == 0 || space->free_len == free_len); + space->size_in_header= size; + space->free_limit= free_limit; + space->free_len= free_len; + } #ifdef UNIV_LINUX - find_metadata(handle, &statbuf); + find_metadata(handle, &statbuf); #else - find_metadata(); + find_metadata(); #endif - /* Truncate the size to a multiple of extent size. */ - ulint mask = psize * FSP_EXTENT_SIZE - 1; + /* Truncate the size to a multiple of extent size. */ + ulint mask= psize * FSP_EXTENT_SIZE - 1; - if (size_bytes <= mask) { - /* .ibd files start smaller than an - extent size. Do not truncate valid data. */ - } else { - size_bytes &= ~os_offset_t(mask); - } + if (size_bytes <= mask); + /* .ibd files start smaller than an + extent size. Do not truncate valid data. */ + else size_bytes &= ~os_offset_t(mask); - space->flags = (space->flags & FSP_FLAGS_MEM_MASK) | flags; - - space->punch_hole = space->is_compressed(); - this->size = uint32_t(size_bytes / psize); - space->set_sizes(this->size); - ut_ad(space->free_limit == 0 || space->free_limit == free_limit); - ut_ad(space->free_len == 0 || space->free_len == free_len); - space->size_in_header = size; - space->free_limit = free_limit; - space->free_len = free_len; - return true; + space->punch_hole= space->is_compressed(); + this->size= uint32_t(size_bytes / psize); + space->set_sizes(this->size); + return true; } + #endif /* !UNIV_INNOCHECKSUM */ |