diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2019-04-17 15:26:17 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2019-04-17 15:26:17 +0300 |
commit | 250799f961b8f261ae2922382e2c69bca7c3b6fa (patch) | |
tree | 2ac2b7b9864254188d9ac426fc878e511224477d | |
parent | a2335b791aa06fbf4b784486ed9d34ba75af9d51 (diff) | |
parent | 169c00994b5c7d94576fea88b2e84a8833511e96 (diff) | |
download | mariadb-git-250799f961b8f261ae2922382e2c69bca7c3b6fa.tar.gz |
Merge 10.2 into 10.3
-rw-r--r-- | mysql-test/suite/encryption/r/corrupted_during_recovery.result | 22 | ||||
-rw-r--r-- | mysql-test/suite/encryption/t/corrupted_during_recovery.test | 60 | ||||
-rw-r--r-- | mysql-test/suite/innodb/r/corrupted_during_recovery.result | 22 | ||||
-rw-r--r-- | mysql-test/suite/innodb/r/log_alter_table.result | 8 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/corrupted_during_recovery.opt | 1 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/corrupted_during_recovery.test | 60 | ||||
-rw-r--r-- | mysql-test/suite/innodb/t/log_alter_table.test | 8 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 15 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 2 | ||||
-rw-r--r-- | storage/innobase/ibuf/ibuf0ibuf.cc | 5 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 9 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 3 | ||||
-rw-r--r-- | storage/innobase/include/fsp0types.h | 9 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 284 | ||||
-rw-r--r-- | storage/innobase/row/row0merge.cc | 41 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 3 |
16 files changed, 495 insertions, 57 deletions
diff --git a/mysql-test/suite/encryption/r/corrupted_during_recovery.result b/mysql-test/suite/encryption/r/corrupted_during_recovery.result new file mode 100644 index 00000000000..41c0d7d75a8 --- /dev/null +++ b/mysql-test/suite/encryption/r/corrupted_during_recovery.result @@ -0,0 +1,22 @@ +CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES; +INSERT INTO t1 VALUES(1); +CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES; +INSERT INTO t1 VALUES(2); +SET GLOBAL innodb_flush_log_at_trx_commit=1; +INSERT INTO t2 VALUES(2); +# Kill the server +# Corrupt the pages +SELECT * FROM t1; +ERROR 42000: Unknown storage engine 'InnoDB' +SELECT * FROM t1; +a +1 +2 +SELECT * FROM t2; +a +2 +CHECK TABLE t1,t2; +Table Op Msg_type Msg_text +test.t1 check status OK +test.t2 check status OK +DROP TABLE t1, t2; diff --git a/mysql-test/suite/encryption/t/corrupted_during_recovery.test b/mysql-test/suite/encryption/t/corrupted_during_recovery.test new file mode 100644 index 00000000000..5784d5775c6 --- /dev/null +++ b/mysql-test/suite/encryption/t/corrupted_during_recovery.test @@ -0,0 +1,60 @@ +--source include/have_innodb.inc +--source include/have_file_key_management_plugin.inc + +--disable_query_log +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +call mtr.add_suppression("Plugin 'InnoDB' init function returned error"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); +call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace test/t1 page"); +call mtr.add_suppression("InnoDB: Failed to read file '.*test.t1\\.ibd' at offset 3: Table is encrypted but decrypt failed"); +call mtr.add_suppression("InnoDB: The page \\[page id: space=\\d+, page number=3\\] in file '.*test.t1\\.ibd' cannot be decrypted"); +call mtr.add_suppression("InnoDB: Table in tablespace \\d+ encrypted. However key management plugin or used key_version \\d+ is not found or used encryption algorithm or method does not match. Can't continue opening the table."); +--enable_query_log + +let INNODB_PAGE_SIZE=`select @@innodb_page_size`; +CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES; +INSERT INTO t1 VALUES(1); +# Force a redo log checkpoint. +--source include/restart_mysqld.inc +--source ../../suite/innodb/include/no_checkpoint_start.inc +CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES; +INSERT INTO t1 VALUES(2); +SET GLOBAL innodb_flush_log_at_trx_commit=1; +INSERT INTO t2 VALUES(2); + +--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1,t2; +--source ../../suite/innodb/include/no_checkpoint_end.inc + +--echo # Corrupt the pages + +perl; +my $ps = $ENV{INNODB_PAGE_SIZE}; + +my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd"; +open(FILE, "+<$file") || die "Unable to open $file"; +binmode FILE; +seek (FILE, $ENV{INNODB_PAGE_SIZE} * 3, SEEK_SET) or die "seek"; +print FILE "junk"; +close FILE or die "close"; + +$file = "$ENV{MYSQLD_DATADIR}/test/t2.ibd"; +open(FILE, "+<$file") || die "Unable to open $file"; +binmode FILE; +# Corrupt pages 1 to 3. MLOG_INIT_FILE_PAGE2 should protect us! +# Unfortunately, we are not immune to page 0 corruption. +seek (FILE, $ps, SEEK_SET) or die "seek"; +print FILE chr(0xff) x ($ps * 3); +close FILE or die "close"; +EOF + +--source include/start_mysqld.inc +--error ER_UNKNOWN_STORAGE_ENGINE +SELECT * FROM t1; +let $restart_parameters=--innodb_force_recovery=1; +--source include/restart_mysqld.inc + +SELECT * FROM t1; +SELECT * FROM t2; +CHECK TABLE t1,t2; + +DROP TABLE t1, t2; diff --git a/mysql-test/suite/innodb/r/corrupted_during_recovery.result b/mysql-test/suite/innodb/r/corrupted_during_recovery.result new file mode 100644 index 00000000000..788f17e3284 --- /dev/null +++ b/mysql-test/suite/innodb/r/corrupted_during_recovery.result @@ -0,0 +1,22 @@ +CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES(1); +CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES(2); +SET GLOBAL innodb_flush_log_at_trx_commit=1; +INSERT INTO t2 VALUES(1); +# Kill the server +# Corrupt the pages +SELECT * FROM t1; +ERROR 42000: Unknown storage engine 'InnoDB' +SELECT * FROM t1; +a +0 +2 +SELECT * FROM t2; +a +1 +CHECK TABLE t1,t2; +Table Op Msg_type Msg_text +test.t1 check status OK +test.t2 check status OK +DROP TABLE t1, t2; diff --git a/mysql-test/suite/innodb/r/log_alter_table.result b/mysql-test/suite/innodb/r/log_alter_table.result index a6f35543c04..9cb9ed77e33 100644 --- a/mysql-test/suite/innodb/r/log_alter_table.result +++ b/mysql-test/suite/innodb/r/log_alter_table.result @@ -7,11 +7,11 @@ # CREATE TABLE t1 (a INT NOT NULL, b INT UNIQUE) ENGINE=InnoDB; INSERT INTO t1 VALUES (1,2); -ALTER TABLE t1 ADD PRIMARY KEY(a), ALGORITHM=INPLACE; -ALTER TABLE t1 DROP INDEX b, ADD INDEX (b); +ALTER TABLE t1 ADD PRIMARY KEY(a), LOCK=SHARED, ALGORITHM=INPLACE; +ALTER TABLE t1 DROP INDEX b, ADD INDEX (b), LOCK=SHARED; # Kill the server -FOUND 1 /scan .*: multi-log rec MLOG_FILE_CREATE2.*page .*:0/ in mysqld.1.err -FOUND 1 /scan .*: log rec MLOG_INDEX_LOAD/ in mysqld.1.err +FOUND 2 /scan \d+: multi-log rec MLOG_FILE_CREATE2 len \d+ page \d+:0/ in mysqld.1.err +FOUND 3 /scan \d+: log rec MLOG_INDEX_LOAD/ in mysqld.1.err CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK diff --git a/mysql-test/suite/innodb/t/corrupted_during_recovery.opt b/mysql-test/suite/innodb/t/corrupted_during_recovery.opt new file mode 100644 index 00000000000..6051f4cd1fa --- /dev/null +++ b/mysql-test/suite/innodb/t/corrupted_during_recovery.opt @@ -0,0 +1 @@ +--innodb_doublewrite=0 diff --git a/mysql-test/suite/innodb/t/corrupted_during_recovery.test b/mysql-test/suite/innodb/t/corrupted_during_recovery.test new file mode 100644 index 00000000000..fbfb1bbe5d5 --- /dev/null +++ b/mysql-test/suite/innodb/t/corrupted_during_recovery.test @@ -0,0 +1,60 @@ +--source include/have_innodb.inc + +--disable_query_log +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +call mtr.add_suppression("Plugin 'InnoDB' init function returned error"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); +call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace test/t1 page"); +call mtr.add_suppression("InnoDB: Failed to read file '.*test.t1\\.ibd' at offset 3: Page read from tablespace is corrupted."); +--enable_query_log + +let INNODB_PAGE_SIZE=`select @@innodb_page_size`; +CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES(1); +# Force a redo log checkpoint. +--source include/restart_mysqld.inc +--source ../include/no_checkpoint_start.inc +CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES(2); +SET GLOBAL innodb_flush_log_at_trx_commit=1; +INSERT INTO t2 VALUES(1); + +--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1,t2; +--source ../include/no_checkpoint_end.inc + +--echo # Corrupt the pages + +perl; +my $ps = $ENV{INNODB_PAGE_SIZE}; + +my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd"; +open(FILE, "+<$file") || die "Unable to open $file"; +binmode FILE; +sysseek(FILE, 3*$ps, 0) || die "Unable to seek $file\n"; +die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps; +# Replace the a=1 with a=0. +$page =~ s/\x80\x0\x0\x0\x0\x0\x0\x1/\x80\x0\x0\x0\x0\x0\x0\x0/; +sysseek(FILE, 3*$ps, 0) || die "Unable to seek $file\n"; +syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n"; +close FILE or die "close"; + +$file = "$ENV{MYSQLD_DATADIR}/test/t2.ibd"; +open(FILE, "+<$file") || die "Unable to open $file"; +binmode FILE; +# Corrupt pages 1 to 3. MLOG_INIT_FILE_PAGE2 should protect us! +# Unfortunately, we are not immune to page 0 corruption. +seek (FILE, $ps, SEEK_SET) or die "seek"; +print FILE chr(0xff) x ($ps * 3); +close FILE or die "close"; +EOF + +--source include/start_mysqld.inc +--error ER_UNKNOWN_STORAGE_ENGINE +SELECT * FROM t1; +let $restart_parameters=--innodb_force_recovery=1; +--source include/restart_mysqld.inc +SELECT * FROM t1; +SELECT * FROM t2; +CHECK TABLE t1,t2; + +DROP TABLE t1, t2; diff --git a/mysql-test/suite/innodb/t/log_alter_table.test b/mysql-test/suite/innodb/t/log_alter_table.test index 6f12dfaf0b9..c92953f16a1 100644 --- a/mysql-test/suite/innodb/t/log_alter_table.test +++ b/mysql-test/suite/innodb/t/log_alter_table.test @@ -19,9 +19,9 @@ CREATE TABLE t1 (a INT NOT NULL, b INT UNIQUE) ENGINE=InnoDB; # MLOG_INDEX_LOAD will not be emitted for empty tables. Insert a row. INSERT INTO t1 VALUES (1,2); # We should get two MLOG_INDEX_LOAD for this. -ALTER TABLE t1 ADD PRIMARY KEY(a), ALGORITHM=INPLACE; +ALTER TABLE t1 ADD PRIMARY KEY(a), LOCK=SHARED, ALGORITHM=INPLACE; # And one MLOG_INDEX_LOAD for this. -ALTER TABLE t1 DROP INDEX b, ADD INDEX (b); +ALTER TABLE t1 DROP INDEX b, ADD INDEX (b), LOCK=SHARED; --let CLEANUP_IF_CHECKPOINT=DROP TABLE t1; --source include/no_checkpoint_end.inc @@ -32,10 +32,10 @@ ALTER TABLE t1 DROP INDEX b, ADD INDEX (b); let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; let SEARCH_ABORT=NOT FOUND; # ensure that we have exactly 2 records there. -let SEARCH_PATTERN=scan .*: multi-log rec MLOG_FILE_CREATE2.*page .*:0; +let SEARCH_PATTERN=scan \d+: multi-log rec MLOG_FILE_CREATE2 len \d+ page \d+:0; --source include/search_pattern_in_file.inc # ensure that we have exactly 3 records there. -let SEARCH_PATTERN=scan .*: log rec MLOG_INDEX_LOAD; +let SEARCH_PATTERN=scan \d+: log rec MLOG_INDEX_LOAD; --source include/search_pattern_in_file.inc CHECK TABLE t1; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index b709fe643fa..d5d1dcea152 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -5612,7 +5612,15 @@ buf_page_create( buf_block_free(free_block); - return(buf_page_get_with_no_latch(page_id, page_size, mtr)); + if (!recv_recovery_is_on()) { + return buf_page_get_with_no_latch(page_id, page_size, + mtr); + } + + mutex_exit(&recv_sys->mutex); + block = buf_page_get_with_no_latch(page_id, page_size, mtr); + mutex_enter(&recv_sys->mutex); + return block; } /* If we get here, the page was not in buf_pool: init it there */ @@ -5678,7 +5686,9 @@ buf_page_create( /* Delete possible entries for the page from the insert buffer: such can exist if the page belonged to an index which was dropped */ - ibuf_merge_or_delete_for_page(NULL, page_id, &page_size, TRUE); + if (!recv_recovery_is_on()) { + ibuf_merge_or_delete_for_page(NULL, page_id, &page_size, TRUE); + } frame = block->frame; @@ -5693,6 +5703,7 @@ buf_page_create( (3) key_version on encrypted pages (not page 0:0) */ memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); + memset(frame + FIL_PAGE_LSN, 0, 8); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 5771 || buf_validate()); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 8db3b72cee6..11191142882 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4033,7 +4033,7 @@ static int innodb_init_params() if (innobase_open_files > open_files_limit) { ib::warn() << "innodb_open_files " << innobase_open_files << " should not be greater" - << "than the open_files_limit " << open_files_limit; + << " than the open_files_limit " << open_files_limit; if (innobase_open_files > tc_size) { innobase_open_files = tc_size; } diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index e3c61ca29cc..e8d089ee41b 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2018, MariaDB Corporation. +Copyright (c) 2016, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -4433,7 +4433,8 @@ ibuf_merge_or_delete_for_page( ulint dops[IBUF_OP_COUNT]; ut_ad(block == NULL || page_id == block->page.id); - ut_ad(block == NULL || buf_block_get_io_fix(block) == BUF_IO_READ); + ut_ad(block == NULL || buf_block_get_io_fix(block) == BUF_IO_READ + || recv_recovery_is_on()); if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE || trx_sys_hdr_page(page_id) diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 401d05b6e5a..5c6360823bf 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -235,6 +235,15 @@ public: } bool operator!=(const page_id_t& rhs) const { return !(*this == rhs); } + bool operator<(const page_id_t& rhs) const + { + if (m_space == rhs.m_space) { + return m_page_no < rhs.m_page_no; + } + + return m_space < rhs.m_space; + } + /** Retrieve the tablespace id. @return tablespace id */ uint32_t space() const { return m_space; } diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 4ae303e9321..14eacc54b24 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -84,6 +84,9 @@ struct fil_space_t { Protected by log_sys.mutex. If and only if this is nonzero, the tablespace will be in named_spaces. */ + /** Log sequence number of the latest MLOG_INDEX_LOAD record + that was found while parsing the redo log */ + lsn_t enable_lsn; bool stop_new_ops; /*!< we set this true when we start deleting a single-table tablespace. diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h index 642bd20e67e..0f965261ca8 100644 --- a/storage/innobase/include/fsp0types.h +++ b/storage/innobase/include/fsp0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2018, MariaDB Corporation. +Copyright (c) 2014, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -255,8 +255,7 @@ or have been introduced in MySQL 5.7 or 8.0: ===================================================================== The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS: ===================================================================== - 25: DATA_DIR - 26..27: ATOMIC_WRITES + 27: DATA_DIR 28..31: COMPRESSION_LEVEL */ @@ -264,9 +263,9 @@ The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS: #define FSP_FLAGS_MEM_MASK (~0U << FSP_FLAGS_MEM_DATA_DIR) /** Zero relative shift position of the DATA_DIR flag */ -#define FSP_FLAGS_MEM_DATA_DIR 25 +#define FSP_FLAGS_MEM_DATA_DIR 27 /** Zero relative shift position of the COMPRESSION_LEVEL field */ -#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 26 +#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 28 /** Zero relative shift position of the POST_ANTELOPE field */ #define FSP_FLAGS_POS_POST_ANTELOPE 0 diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 9930fdeae03..f941c522dcc 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -155,10 +155,22 @@ struct file_name_t { /** FSP_SIZE of tablespace */ ulint size; + /** the log sequence number of the last observed MLOG_INDEX_LOAD + record for the tablespace */ + lsn_t enable_lsn; + /** Constructor */ file_name_t(std::string name_, bool deleted) : name(name_), space(NULL), status(deleted ? DELETED: NORMAL), - size(0) {} + size(0), enable_lsn(0) {} + + /** Report a MLOG_INDEX_LOAD operation, meaning that + mlog_init for any earlier LSN must be skipped. + @param lsn log sequence number of the MLOG_INDEX_LOAD */ + void mlog_index_load(lsn_t lsn) + { + if (enable_lsn < lsn) enable_lsn = lsn; + } }; /** Map of dirty tablespaces during recovery */ @@ -174,6 +186,8 @@ static recv_spaces_t recv_spaces; enum recv_addr_state { /** not yet processed */ RECV_NOT_PROCESSED, + /** not processed; the page will be reinitialized */ + RECV_WILL_NOT_READ, /** page is being read */ RECV_BEING_READ, /** log records are being applied on the page */ @@ -220,6 +234,117 @@ void (*log_file_op)(ulint space_id, const byte* flags, const byte* name, ulint len, const byte* new_name, ulint new_len); +/** Information about initializing page contents during redo log processing */ +class mlog_init_t +{ +public: + /** A page initialization operation that was parsed from + the redo log */ + struct init { + /** log sequence number of the page initialization */ + lsn_t lsn; + /** Whether btr_page_create() avoided a read of the page. + + At the end of the last recovery batch, ibuf_merge() + will invoke change buffer merge for pages that reside + in the buffer pool. (In the last batch, loading pages + would trigger change buffer merge.) */ + bool created; + }; + +private: + typedef std::map<const page_id_t, init, + std::less<const page_id_t>, + ut_allocator<std::pair<const page_id_t, init> > > + map; + /** Map of page initialization operations. + FIXME: Merge this to recv_sys->addr_hash! */ + map inits; +public: + /** Record that a page will be initialized by the redo log. + @param[in] space tablespace identifier + @param[in] page_no page number + @param[in] lsn log sequence number */ + void add(ulint space, ulint page_no, lsn_t lsn) + { + ut_ad(mutex_own(&recv_sys->mutex)); + const init init = { lsn, false }; + std::pair<map::iterator, bool> p = inits.insert( + map::value_type(page_id_t(space, page_no), init)); + ut_ad(!p.first->second.created); + if (!p.second && p.first->second.lsn < init.lsn) { + p.first->second = init; + } + } + + /** Get the last stored lsn of the page id and its respective + init/load operation. + @param[in] page_id page id + @param[in,out] init initialize log or load log + @return the latest page initialization; + not valid after releasing recv_sys->mutex. */ + init& last(page_id_t page_id) + { + ut_ad(mutex_own(&recv_sys->mutex)); + return inits.find(page_id)->second; + } + + /** At the end of each recovery batch, reset the 'created' flags. */ + void reset() + { + ut_ad(mutex_own(&recv_sys->mutex)); + ut_ad(recv_no_ibuf_operations); + for (map::iterator i= inits.begin(); i != inits.end(); i++) { + i->second.created = false; + } + } + + /** On the last recovery batch, merge buffered changes to those + pages that were initialized by buf_page_create() and still reside + in the buffer pool. Stale pages are not allowed in the buffer pool. + + Note: When MDEV-14481 implements redo log apply in the + background, we will have to ensure that buf_page_get_gen() + will not deliver stale pages to users (pages on which the + change buffer was not merged yet). Normally, the change + buffer merge is performed on I/O completion. Maybe, add a + flag to buf_page_t and perform the change buffer merge on + the first actual access? + @param[in,out] mtr dummy mini-transaction */ + void ibuf_merge(mtr_t& mtr) + { + ut_ad(mutex_own(&recv_sys->mutex)); + ut_ad(!recv_no_ibuf_operations); + mtr.start(); + + for (map::const_iterator i= inits.begin(); i != inits.end(); + i++) { + if (!i->second.created) { + continue; + } + if (buf_block_t* block = buf_page_get_gen( + i->first, univ_page_size, RW_X_LATCH, NULL, + BUF_GET_IF_IN_POOL, __FILE__, __LINE__, + &mtr, NULL)) { + mutex_exit(&recv_sys->mutex); + ibuf_merge_or_delete_for_page( + block, i->first, + &block->page.size, true); + mtr.commit(); + mtr.start(); + mutex_enter(&recv_sys->mutex); + } + } + + mtr.commit(); + } + + /** Clear the data structure */ + void clear() { inits.clear(); } +}; + +static mlog_init_t mlog_init; + /** Process a MLOG_CREATE2 record that indicates that a tablespace is being shrunk in size. @param[in] space_id tablespace identifier @@ -623,6 +748,7 @@ recv_sys_close() } recv_spaces.clear(); + mlog_init.clear(); } /************************************************************ @@ -1797,6 +1923,18 @@ recv_add_to_hash_table( recv_sys->n_addrs++; } + switch (type) { + case MLOG_INIT_FILE_PAGE2: + case MLOG_ZIP_PAGE_COMPRESS: + /* Ignore any earlier redo log records for this page. */ + ut_ad(recv_addr->state == RECV_NOT_PROCESSED + || recv_addr->state == RECV_WILL_NOT_READ); + recv_addr->state = RECV_WILL_NOT_READ; + mlog_init.add(space, page_no, start_lsn); + default: + break; + } + UT_LIST_ADD_LAST(recv_addr->rec_list, recv); prev_field = &(recv->data); @@ -1865,9 +2003,10 @@ recv_data_copy_to_buf( lsn of a log record. @param[in,out] block buffer pool page @param[in,out] mtr mini-transaction -@param[in,out] recv_addr recovery address */ +@param[in,out] recv_addr recovery address +@param[in] init_lsn the initial LSN where to start recovery */ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, - recv_addr_t* recv_addr) + recv_addr_t* recv_addr, lsn_t init_lsn = 0) { page_t* page; page_zip_des_t* page_zip; @@ -1921,6 +2060,15 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr, if (recv->start_lsn < page_lsn) { /* Ignore this record, because there are later changes for this page. */ + DBUG_LOG("ib_log", "apply skip " + << get_mlog_string(recv->type) + << " LSN " << recv->start_lsn << " < " + << page_lsn); + } else if (recv->start_lsn < init_lsn) { + DBUG_LOG("ib_log", "init skip " + << get_mlog_string(recv->type) + << " LSN " << recv->start_lsn << " < " + << init_lsn); } else if (srv_was_tablespace_truncated(space) && recv->start_lsn < truncate_t::get_truncated_tablespace_init_lsn( @@ -2168,6 +2316,7 @@ ignore: case RECV_DISCARDED: goto ignore; case RECV_NOT_PROCESSED: + case RECV_WILL_NOT_READ: break; } @@ -2181,19 +2330,95 @@ ignore: const page_id_t page_id(recv_addr->space, recv_addr->page_no); - mtr.start(); - mtr.set_log_mode(MTR_LOG_NONE); - if (buf_block_t* block = buf_page_get_gen( - page_id, univ_page_size, RW_X_LATCH, - NULL, BUF_GET_IF_IN_POOL, - __FILE__, __LINE__, &mtr, NULL)) { - buf_block_dbg_add_level( - block, SYNC_NO_ORDER_CHECK); - recv_recover_page(block, mtr, recv_addr); - ut_ad(mtr.has_committed()); + if (recv_addr->state == RECV_NOT_PROCESSED) { +apply: + mtr.start(); + mtr.set_log_mode(MTR_LOG_NONE); + if (buf_block_t* block = buf_page_get_gen( + page_id, univ_page_size, + RW_X_LATCH, NULL, + BUF_GET_IF_IN_POOL, + __FILE__, __LINE__, &mtr, NULL)) { + buf_block_dbg_add_level( + block, SYNC_NO_ORDER_CHECK); + recv_recover_page(block, mtr, + recv_addr); + ut_ad(mtr.has_committed()); + } else { + mtr.commit(); + recv_read_in_area(page_id); + } } else { - mtr.commit(); - recv_read_in_area(page_id); + mlog_init_t::init& i = mlog_init.last(page_id); + const lsn_t end_lsn = UT_LIST_GET_LAST( + recv_addr->rec_list)->end_lsn; + + if (end_lsn < i.lsn) { + DBUG_LOG("ib_log", "skip log for page " + << page_id + << " LSN " << end_lsn + << " < " << i.lsn); +skip: + recv_addr->state = RECV_PROCESSED; + goto ignore; + } + + fil_space_t* space = fil_space_acquire( + recv_addr->space); + if (!space) { + goto skip; + } + + if (space->enable_lsn) { +do_read: + space->release(); + recv_addr->state = RECV_NOT_PROCESSED; + goto apply; + } + + /* Determine if a tablespace could be + for an internal table for FULLTEXT INDEX. + For those tables, no MLOG_INDEX_LOAD record + used to be written when redo logging was + disabled. Hence, we cannot optimize + away page reads, because all the redo + log records for initializing and + modifying the page in the past could + be older than the page in the data + file. + + The check is too broad, causing all + tables whose names start with FTS_ to + skip the optimization. */ + + if (strstr(space->name, "/FTS_")) { + goto do_read; + } + + mtr.start(); + mtr.set_log_mode(MTR_LOG_NONE); + buf_block_t* block = buf_page_create( + page_id, page_size_t(space->flags), + &mtr); + if (recv_addr->state == RECV_PROCESSED) { + /* The page happened to exist + in the buffer pool, or it was + just being read in. Before + buf_page_get_with_no_latch() + returned, all changes must have + been applied to the page already. */ + mtr.commit(); + } else { + i.created = true; + buf_block_dbg_add_level( + block, SYNC_NO_ORDER_CHECK); + mtr.x_latch_at_savepoint(0, block); + recv_recover_page(block, mtr, + recv_addr, i.lsn); + ut_ad(mtr.has_committed()); + } + + space->release(); } } } @@ -2201,7 +2426,13 @@ ignore: /* Wait until all the pages have been processed */ while (recv_sys->n_addrs != 0) { - bool abort = recv_sys->found_corrupt_log; + const bool abort = recv_sys->found_corrupt_log + || recv_sys->found_corrupt_fs; + + if (recv_sys->found_corrupt_fs && !srv_force_recovery) { + ib::info() << "Set innodb_force_recovery=1" + " to ignore corrupted pages."; + } mutex_exit(&(recv_sys->mutex)); @@ -2240,6 +2471,10 @@ ignore: log_mutex_enter(); mutex_enter(&(recv_sys->mutex)); + mlog_init.reset(); + } else if (!recv_no_ibuf_operations) { + /* We skipped this in buf_page_create(). */ + mlog_init.ibuf_merge(mtr); } recv_sys->apply_log_recs = FALSE; @@ -2441,9 +2676,17 @@ recv_report_corrupt_log( } /** Report a MLOG_INDEX_LOAD operation. -@param[in] space_id tablespace identifier */ -ATTRIBUTE_COLD static void recv_mlog_index_load(ulint space_id) +@param[in] space_id tablespace id +@param[in] page_no page number +@param[in] lsn log sequence number */ +ATTRIBUTE_COLD static void +recv_mlog_index_load(ulint space_id, ulint page_no, lsn_t lsn) { + recv_spaces_t::iterator it = recv_spaces.find(space_id); + if (it != recv_spaces.end()) { + it->second.mlog_index_load(lsn); + } + if (log_optimized_ddl_op) { log_optimized_ddl_op(space_id); } @@ -2605,7 +2848,7 @@ loop: /* fall through */ case MLOG_INDEX_LOAD: if (type == MLOG_INDEX_LOAD) { - recv_mlog_index_load(space); + recv_mlog_index_load(space, page_no, old_lsn); } /* fall through */ case MLOG_FILE_NAME: @@ -2759,7 +3002,7 @@ corrupted_log: break; #endif /* UNIV_LOG_LSN_DEBUG */ case MLOG_INDEX_LOAD: - recv_mlog_index_load(space); + recv_mlog_index_load(space, page_no, old_lsn); break; case MLOG_FILE_NAME: case MLOG_FILE_DELETE: @@ -3299,6 +3542,7 @@ recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace) /* The tablespace was found, and there are some redo log records for it. */ fil_names_dirty(i->second.space); + i->second.space->enable_lsn = i->second.enable_lsn; } else if (i->second.name == "") { ib::error() << "Missing MLOG_FILE_NAME" " or MLOG_FILE_DELETE" diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 1f4ffdf1e8c..c6d01392f31 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -4933,23 +4933,28 @@ wait_again: if (indexes[i]->type & DICT_FTS) { row_fts_psort_info_destroy(psort_info, merge_info); fts_psort_initiated = false; - } else if (error != DB_SUCCESS || !online) { - /* Do not apply any online log. */ + } else if (dict_index_is_spatial(indexes[i])) { + /* We never disable redo logging for + creating SPATIAL INDEX. Avoid writing any + unnecessary MLOG_INDEX_LOAD record. */ } else if (old_table != new_table) { ut_ad(!sort_idx->online_log); ut_ad(sort_idx->online_status == ONLINE_INDEX_COMPLETE); - } else { - if (dict_index_is_spatial(indexes[i])) { - /* We never disable redo logging for - creating SPATIAL INDEX. Avoid writing any - unnecessary MLOG_INDEX_LOAD record. */ - } else if (FlushObserver* flush_observer = - trx->get_flush_observer()) { - flush_observer->flush(); - row_merge_write_redo(indexes[i]); + } else if (FlushObserver* flush_observer = + trx->get_flush_observer()) { + if (error != DB_SUCCESS) { + flush_observer->interrupted(); } + flush_observer->flush(); + row_merge_write_redo(indexes[i]); + } + if (old_table != new_table + || (indexes[i]->type & (DICT_FTS | DICT_SPATIAL)) + || error != DB_SUCCESS || !online) { + /* Do not apply any online log. */ + } else { if (global_system_variables.log_warnings > 2) { sql_print_information( "InnoDB: Online DDL : Applying" @@ -5056,13 +5061,7 @@ func_exit: flush_observer->flush(); - trx->remove_flush_observer(); - - if (trx_is_interrupted(trx)) { - error = DB_INTERRUPTED; - } - - if (error == DB_SUCCESS && old_table != new_table) { + if (old_table != new_table) { for (const dict_index_t* index = dict_table_get_first_index(new_table); index != NULL; @@ -5073,6 +5072,12 @@ func_exit: } } } + + trx->remove_flush_observer(); + + if (trx_is_interrupted(trx)) { + error = DB_INTERRUPTED; + } } DBUG_RETURN(error); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 8164d1df133..a60170de48e 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1984,7 +1984,8 @@ files_checked: recv_apply_hashed_log_recs(true); - if (recv_sys->found_corrupt_log) { + if (recv_sys->found_corrupt_log + || recv_sys->found_corrupt_fs) { return(srv_init_abort(DB_CORRUPTION)); } |