summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2019-04-17 15:26:17 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2019-04-17 15:26:17 +0300
commit250799f961b8f261ae2922382e2c69bca7c3b6fa (patch)
tree2ac2b7b9864254188d9ac426fc878e511224477d
parenta2335b791aa06fbf4b784486ed9d34ba75af9d51 (diff)
parent169c00994b5c7d94576fea88b2e84a8833511e96 (diff)
downloadmariadb-git-250799f961b8f261ae2922382e2c69bca7c3b6fa.tar.gz
Merge 10.2 into 10.3
-rw-r--r--mysql-test/suite/encryption/r/corrupted_during_recovery.result22
-rw-r--r--mysql-test/suite/encryption/t/corrupted_during_recovery.test60
-rw-r--r--mysql-test/suite/innodb/r/corrupted_during_recovery.result22
-rw-r--r--mysql-test/suite/innodb/r/log_alter_table.result8
-rw-r--r--mysql-test/suite/innodb/t/corrupted_during_recovery.opt1
-rw-r--r--mysql-test/suite/innodb/t/corrupted_during_recovery.test60
-rw-r--r--mysql-test/suite/innodb/t/log_alter_table.test8
-rw-r--r--storage/innobase/buf/buf0buf.cc15
-rw-r--r--storage/innobase/handler/ha_innodb.cc2
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc5
-rw-r--r--storage/innobase/include/buf0buf.h9
-rw-r--r--storage/innobase/include/fil0fil.h3
-rw-r--r--storage/innobase/include/fsp0types.h9
-rw-r--r--storage/innobase/log/log0recv.cc284
-rw-r--r--storage/innobase/row/row0merge.cc41
-rw-r--r--storage/innobase/srv/srv0start.cc3
16 files changed, 495 insertions, 57 deletions
diff --git a/mysql-test/suite/encryption/r/corrupted_during_recovery.result b/mysql-test/suite/encryption/r/corrupted_during_recovery.result
new file mode 100644
index 00000000000..41c0d7d75a8
--- /dev/null
+++ b/mysql-test/suite/encryption/r/corrupted_during_recovery.result
@@ -0,0 +1,22 @@
+CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES;
+INSERT INTO t1 VALUES(1);
+CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES;
+INSERT INTO t1 VALUES(2);
+SET GLOBAL innodb_flush_log_at_trx_commit=1;
+INSERT INTO t2 VALUES(2);
+# Kill the server
+# Corrupt the pages
+SELECT * FROM t1;
+ERROR 42000: Unknown storage engine 'InnoDB'
+SELECT * FROM t1;
+a
+1
+2
+SELECT * FROM t2;
+a
+2
+CHECK TABLE t1,t2;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+test.t2 check status OK
+DROP TABLE t1, t2;
diff --git a/mysql-test/suite/encryption/t/corrupted_during_recovery.test b/mysql-test/suite/encryption/t/corrupted_during_recovery.test
new file mode 100644
index 00000000000..5784d5775c6
--- /dev/null
+++ b/mysql-test/suite/encryption/t/corrupted_during_recovery.test
@@ -0,0 +1,60 @@
+--source include/have_innodb.inc
+--source include/have_file_key_management_plugin.inc
+
+--disable_query_log
+call mtr.add_suppression("InnoDB: Plugin initialization aborted");
+call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
+call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
+call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace test/t1 page");
+call mtr.add_suppression("InnoDB: Failed to read file '.*test.t1\\.ibd' at offset 3: Table is encrypted but decrypt failed");
+call mtr.add_suppression("InnoDB: The page \\[page id: space=\\d+, page number=3\\] in file '.*test.t1\\.ibd' cannot be decrypted");
+call mtr.add_suppression("InnoDB: Table in tablespace \\d+ encrypted. However key management plugin or used key_version \\d+ is not found or used encryption algorithm or method does not match. Can't continue opening the table.");
+--enable_query_log
+
+let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
+CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES;
+INSERT INTO t1 VALUES(1);
+# Force a redo log checkpoint.
+--source include/restart_mysqld.inc
+--source ../../suite/innodb/include/no_checkpoint_start.inc
+CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB, ENCRYPTED=YES;
+INSERT INTO t1 VALUES(2);
+SET GLOBAL innodb_flush_log_at_trx_commit=1;
+INSERT INTO t2 VALUES(2);
+
+--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1,t2;
+--source ../../suite/innodb/include/no_checkpoint_end.inc
+
+--echo # Corrupt the pages
+
+perl;
+my $ps = $ENV{INNODB_PAGE_SIZE};
+
+my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd";
+open(FILE, "+<$file") || die "Unable to open $file";
+binmode FILE;
+seek (FILE, $ENV{INNODB_PAGE_SIZE} * 3, SEEK_SET) or die "seek";
+print FILE "junk";
+close FILE or die "close";
+
+$file = "$ENV{MYSQLD_DATADIR}/test/t2.ibd";
+open(FILE, "+<$file") || die "Unable to open $file";
+binmode FILE;
+# Corrupt pages 1 to 3. MLOG_INIT_FILE_PAGE2 should protect us!
+# Unfortunately, we are not immune to page 0 corruption.
+seek (FILE, $ps, SEEK_SET) or die "seek";
+print FILE chr(0xff) x ($ps * 3);
+close FILE or die "close";
+EOF
+
+--source include/start_mysqld.inc
+--error ER_UNKNOWN_STORAGE_ENGINE
+SELECT * FROM t1;
+let $restart_parameters=--innodb_force_recovery=1;
+--source include/restart_mysqld.inc
+
+SELECT * FROM t1;
+SELECT * FROM t2;
+CHECK TABLE t1,t2;
+
+DROP TABLE t1, t2;
diff --git a/mysql-test/suite/innodb/r/corrupted_during_recovery.result b/mysql-test/suite/innodb/r/corrupted_during_recovery.result
new file mode 100644
index 00000000000..788f17e3284
--- /dev/null
+++ b/mysql-test/suite/innodb/r/corrupted_during_recovery.result
@@ -0,0 +1,22 @@
+CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(1);
+CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(2);
+SET GLOBAL innodb_flush_log_at_trx_commit=1;
+INSERT INTO t2 VALUES(1);
+# Kill the server
+# Corrupt the pages
+SELECT * FROM t1;
+ERROR 42000: Unknown storage engine 'InnoDB'
+SELECT * FROM t1;
+a
+0
+2
+SELECT * FROM t2;
+a
+1
+CHECK TABLE t1,t2;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+test.t2 check status OK
+DROP TABLE t1, t2;
diff --git a/mysql-test/suite/innodb/r/log_alter_table.result b/mysql-test/suite/innodb/r/log_alter_table.result
index a6f35543c04..9cb9ed77e33 100644
--- a/mysql-test/suite/innodb/r/log_alter_table.result
+++ b/mysql-test/suite/innodb/r/log_alter_table.result
@@ -7,11 +7,11 @@
#
CREATE TABLE t1 (a INT NOT NULL, b INT UNIQUE) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1,2);
-ALTER TABLE t1 ADD PRIMARY KEY(a), ALGORITHM=INPLACE;
-ALTER TABLE t1 DROP INDEX b, ADD INDEX (b);
+ALTER TABLE t1 ADD PRIMARY KEY(a), LOCK=SHARED, ALGORITHM=INPLACE;
+ALTER TABLE t1 DROP INDEX b, ADD INDEX (b), LOCK=SHARED;
# Kill the server
-FOUND 1 /scan .*: multi-log rec MLOG_FILE_CREATE2.*page .*:0/ in mysqld.1.err
-FOUND 1 /scan .*: log rec MLOG_INDEX_LOAD/ in mysqld.1.err
+FOUND 2 /scan \d+: multi-log rec MLOG_FILE_CREATE2 len \d+ page \d+:0/ in mysqld.1.err
+FOUND 3 /scan \d+: log rec MLOG_INDEX_LOAD/ in mysqld.1.err
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check status OK
diff --git a/mysql-test/suite/innodb/t/corrupted_during_recovery.opt b/mysql-test/suite/innodb/t/corrupted_during_recovery.opt
new file mode 100644
index 00000000000..6051f4cd1fa
--- /dev/null
+++ b/mysql-test/suite/innodb/t/corrupted_during_recovery.opt
@@ -0,0 +1 @@
+--innodb_doublewrite=0
diff --git a/mysql-test/suite/innodb/t/corrupted_during_recovery.test b/mysql-test/suite/innodb/t/corrupted_during_recovery.test
new file mode 100644
index 00000000000..fbfb1bbe5d5
--- /dev/null
+++ b/mysql-test/suite/innodb/t/corrupted_during_recovery.test
@@ -0,0 +1,60 @@
+--source include/have_innodb.inc
+
+--disable_query_log
+call mtr.add_suppression("InnoDB: Plugin initialization aborted");
+call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
+call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
+call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace test/t1 page");
+call mtr.add_suppression("InnoDB: Failed to read file '.*test.t1\\.ibd' at offset 3: Page read from tablespace is corrupted.");
+--enable_query_log
+
+let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
+CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(1);
+# Force a redo log checkpoint.
+--source include/restart_mysqld.inc
+--source ../include/no_checkpoint_start.inc
+CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES(2);
+SET GLOBAL innodb_flush_log_at_trx_commit=1;
+INSERT INTO t2 VALUES(1);
+
+--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1,t2;
+--source ../include/no_checkpoint_end.inc
+
+--echo # Corrupt the pages
+
+perl;
+my $ps = $ENV{INNODB_PAGE_SIZE};
+
+my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd";
+open(FILE, "+<$file") || die "Unable to open $file";
+binmode FILE;
+sysseek(FILE, 3*$ps, 0) || die "Unable to seek $file\n";
+die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps;
+# Replace the a=1 with a=0.
+$page =~ s/\x80\x0\x0\x0\x0\x0\x0\x1/\x80\x0\x0\x0\x0\x0\x0\x0/;
+sysseek(FILE, 3*$ps, 0) || die "Unable to seek $file\n";
+syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n";
+close FILE or die "close";
+
+$file = "$ENV{MYSQLD_DATADIR}/test/t2.ibd";
+open(FILE, "+<$file") || die "Unable to open $file";
+binmode FILE;
+# Corrupt pages 1 to 3. MLOG_INIT_FILE_PAGE2 should protect us!
+# Unfortunately, we are not immune to page 0 corruption.
+seek (FILE, $ps, SEEK_SET) or die "seek";
+print FILE chr(0xff) x ($ps * 3);
+close FILE or die "close";
+EOF
+
+--source include/start_mysqld.inc
+--error ER_UNKNOWN_STORAGE_ENGINE
+SELECT * FROM t1;
+let $restart_parameters=--innodb_force_recovery=1;
+--source include/restart_mysqld.inc
+SELECT * FROM t1;
+SELECT * FROM t2;
+CHECK TABLE t1,t2;
+
+DROP TABLE t1, t2;
diff --git a/mysql-test/suite/innodb/t/log_alter_table.test b/mysql-test/suite/innodb/t/log_alter_table.test
index 6f12dfaf0b9..c92953f16a1 100644
--- a/mysql-test/suite/innodb/t/log_alter_table.test
+++ b/mysql-test/suite/innodb/t/log_alter_table.test
@@ -19,9 +19,9 @@ CREATE TABLE t1 (a INT NOT NULL, b INT UNIQUE) ENGINE=InnoDB;
# MLOG_INDEX_LOAD will not be emitted for empty tables. Insert a row.
INSERT INTO t1 VALUES (1,2);
# We should get two MLOG_INDEX_LOAD for this.
-ALTER TABLE t1 ADD PRIMARY KEY(a), ALGORITHM=INPLACE;
+ALTER TABLE t1 ADD PRIMARY KEY(a), LOCK=SHARED, ALGORITHM=INPLACE;
# And one MLOG_INDEX_LOAD for this.
-ALTER TABLE t1 DROP INDEX b, ADD INDEX (b);
+ALTER TABLE t1 DROP INDEX b, ADD INDEX (b), LOCK=SHARED;
--let CLEANUP_IF_CHECKPOINT=DROP TABLE t1;
--source include/no_checkpoint_end.inc
@@ -32,10 +32,10 @@ ALTER TABLE t1 DROP INDEX b, ADD INDEX (b);
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err;
let SEARCH_ABORT=NOT FOUND;
# ensure that we have exactly 2 records there.
-let SEARCH_PATTERN=scan .*: multi-log rec MLOG_FILE_CREATE2.*page .*:0;
+let SEARCH_PATTERN=scan \d+: multi-log rec MLOG_FILE_CREATE2 len \d+ page \d+:0;
--source include/search_pattern_in_file.inc
# ensure that we have exactly 3 records there.
-let SEARCH_PATTERN=scan .*: log rec MLOG_INDEX_LOAD;
+let SEARCH_PATTERN=scan \d+: log rec MLOG_INDEX_LOAD;
--source include/search_pattern_in_file.inc
CHECK TABLE t1;
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index b709fe643fa..d5d1dcea152 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -5612,7 +5612,15 @@ buf_page_create(
buf_block_free(free_block);
- return(buf_page_get_with_no_latch(page_id, page_size, mtr));
+ if (!recv_recovery_is_on()) {
+ return buf_page_get_with_no_latch(page_id, page_size,
+ mtr);
+ }
+
+ mutex_exit(&recv_sys->mutex);
+ block = buf_page_get_with_no_latch(page_id, page_size, mtr);
+ mutex_enter(&recv_sys->mutex);
+ return block;
}
/* If we get here, the page was not in buf_pool: init it there */
@@ -5678,7 +5686,9 @@ buf_page_create(
/* Delete possible entries for the page from the insert buffer:
such can exist if the page belonged to an index which was dropped */
- ibuf_merge_or_delete_for_page(NULL, page_id, &page_size, TRUE);
+ if (!recv_recovery_is_on()) {
+ ibuf_merge_or_delete_for_page(NULL, page_id, &page_size, TRUE);
+ }
frame = block->frame;
@@ -5693,6 +5703,7 @@ buf_page_create(
(3) key_version on encrypted pages (not page 0:0) */
memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
+ memset(frame + FIL_PAGE_LSN, 0, 8);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 8db3b72cee6..11191142882 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -4033,7 +4033,7 @@ static int innodb_init_params()
if (innobase_open_files > open_files_limit) {
ib::warn() << "innodb_open_files " << innobase_open_files
<< " should not be greater"
- << "than the open_files_limit " << open_files_limit;
+ << " than the open_files_limit " << open_files_limit;
if (innobase_open_files > tc_size) {
innobase_open_files = tc_size;
}
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index e3c61ca29cc..e8d089ee41b 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2018, MariaDB Corporation.
+Copyright (c) 2016, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -4433,7 +4433,8 @@ ibuf_merge_or_delete_for_page(
ulint dops[IBUF_OP_COUNT];
ut_ad(block == NULL || page_id == block->page.id);
- ut_ad(block == NULL || buf_block_get_io_fix(block) == BUF_IO_READ);
+ ut_ad(block == NULL || buf_block_get_io_fix(block) == BUF_IO_READ
+ || recv_recovery_is_on());
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
|| trx_sys_hdr_page(page_id)
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 401d05b6e5a..5c6360823bf 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -235,6 +235,15 @@ public:
}
bool operator!=(const page_id_t& rhs) const { return !(*this == rhs); }
+ bool operator<(const page_id_t& rhs) const
+ {
+ if (m_space == rhs.m_space) {
+ return m_page_no < rhs.m_page_no;
+ }
+
+ return m_space < rhs.m_space;
+ }
+
/** Retrieve the tablespace id.
@return tablespace id */
uint32_t space() const { return m_space; }
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 4ae303e9321..14eacc54b24 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -84,6 +84,9 @@ struct fil_space_t {
Protected by log_sys.mutex.
If and only if this is nonzero, the
tablespace will be in named_spaces. */
+ /** Log sequence number of the latest MLOG_INDEX_LOAD record
+ that was found while parsing the redo log */
+ lsn_t enable_lsn;
bool stop_new_ops;
/*!< we set this true when we start
deleting a single-table tablespace.
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index 642bd20e67e..0f965261ca8 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2018, MariaDB Corporation.
+Copyright (c) 2014, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -255,8 +255,7 @@ or have been introduced in MySQL 5.7 or 8.0:
=====================================================================
The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS:
=====================================================================
- 25: DATA_DIR
- 26..27: ATOMIC_WRITES
+ 27: DATA_DIR
28..31: COMPRESSION_LEVEL
*/
@@ -264,9 +263,9 @@ The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS:
#define FSP_FLAGS_MEM_MASK (~0U << FSP_FLAGS_MEM_DATA_DIR)
/** Zero relative shift position of the DATA_DIR flag */
-#define FSP_FLAGS_MEM_DATA_DIR 25
+#define FSP_FLAGS_MEM_DATA_DIR 27
/** Zero relative shift position of the COMPRESSION_LEVEL field */
-#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 26
+#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 28
/** Zero relative shift position of the POST_ANTELOPE field */
#define FSP_FLAGS_POS_POST_ANTELOPE 0
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 9930fdeae03..f941c522dcc 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -155,10 +155,22 @@ struct file_name_t {
/** FSP_SIZE of tablespace */
ulint size;
+ /** the log sequence number of the last observed MLOG_INDEX_LOAD
+ record for the tablespace */
+ lsn_t enable_lsn;
+
/** Constructor */
file_name_t(std::string name_, bool deleted) :
name(name_), space(NULL), status(deleted ? DELETED: NORMAL),
- size(0) {}
+ size(0), enable_lsn(0) {}
+
+ /** Report a MLOG_INDEX_LOAD operation, meaning that
+ mlog_init for any earlier LSN must be skipped.
+ @param lsn log sequence number of the MLOG_INDEX_LOAD */
+ void mlog_index_load(lsn_t lsn)
+ {
+ if (enable_lsn < lsn) enable_lsn = lsn;
+ }
};
/** Map of dirty tablespaces during recovery */
@@ -174,6 +186,8 @@ static recv_spaces_t recv_spaces;
enum recv_addr_state {
/** not yet processed */
RECV_NOT_PROCESSED,
+ /** not processed; the page will be reinitialized */
+ RECV_WILL_NOT_READ,
/** page is being read */
RECV_BEING_READ,
/** log records are being applied on the page */
@@ -220,6 +234,117 @@ void (*log_file_op)(ulint space_id, const byte* flags,
const byte* name, ulint len,
const byte* new_name, ulint new_len);
+/** Information about initializing page contents during redo log processing */
+class mlog_init_t
+{
+public:
+ /** A page initialization operation that was parsed from
+ the redo log */
+ struct init {
+ /** log sequence number of the page initialization */
+ lsn_t lsn;
+ /** Whether btr_page_create() avoided a read of the page.
+
+ At the end of the last recovery batch, ibuf_merge()
+ will invoke change buffer merge for pages that reside
+ in the buffer pool. (In the last batch, loading pages
+ would trigger change buffer merge.) */
+ bool created;
+ };
+
+private:
+ typedef std::map<const page_id_t, init,
+ std::less<const page_id_t>,
+ ut_allocator<std::pair<const page_id_t, init> > >
+ map;
+ /** Map of page initialization operations.
+ FIXME: Merge this to recv_sys->addr_hash! */
+ map inits;
+public:
+ /** Record that a page will be initialized by the redo log.
+ @param[in] space tablespace identifier
+ @param[in] page_no page number
+ @param[in] lsn log sequence number */
+ void add(ulint space, ulint page_no, lsn_t lsn)
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ const init init = { lsn, false };
+ std::pair<map::iterator, bool> p = inits.insert(
+ map::value_type(page_id_t(space, page_no), init));
+ ut_ad(!p.first->second.created);
+ if (!p.second && p.first->second.lsn < init.lsn) {
+ p.first->second = init;
+ }
+ }
+
+ /** Get the last stored lsn of the page id and its respective
+ init/load operation.
+ @param[in] page_id page id
+ @param[in,out] init initialize log or load log
+ @return the latest page initialization;
+ not valid after releasing recv_sys->mutex. */
+ init& last(page_id_t page_id)
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ return inits.find(page_id)->second;
+ }
+
+ /** At the end of each recovery batch, reset the 'created' flags. */
+ void reset()
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ ut_ad(recv_no_ibuf_operations);
+ for (map::iterator i= inits.begin(); i != inits.end(); i++) {
+ i->second.created = false;
+ }
+ }
+
+ /** On the last recovery batch, merge buffered changes to those
+ pages that were initialized by buf_page_create() and still reside
+ in the buffer pool. Stale pages are not allowed in the buffer pool.
+
+ Note: When MDEV-14481 implements redo log apply in the
+ background, we will have to ensure that buf_page_get_gen()
+ will not deliver stale pages to users (pages on which the
+ change buffer was not merged yet). Normally, the change
+ buffer merge is performed on I/O completion. Maybe, add a
+ flag to buf_page_t and perform the change buffer merge on
+ the first actual access?
+ @param[in,out] mtr dummy mini-transaction */
+ void ibuf_merge(mtr_t& mtr)
+ {
+ ut_ad(mutex_own(&recv_sys->mutex));
+ ut_ad(!recv_no_ibuf_operations);
+ mtr.start();
+
+ for (map::const_iterator i= inits.begin(); i != inits.end();
+ i++) {
+ if (!i->second.created) {
+ continue;
+ }
+ if (buf_block_t* block = buf_page_get_gen(
+ i->first, univ_page_size, RW_X_LATCH, NULL,
+ BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
+ &mtr, NULL)) {
+ mutex_exit(&recv_sys->mutex);
+ ibuf_merge_or_delete_for_page(
+ block, i->first,
+ &block->page.size, true);
+ mtr.commit();
+ mtr.start();
+ mutex_enter(&recv_sys->mutex);
+ }
+ }
+
+ mtr.commit();
+ }
+
+ /** Clear the data structure */
+ void clear() { inits.clear(); }
+};
+
+static mlog_init_t mlog_init;
+
/** Process a MLOG_CREATE2 record that indicates that a tablespace
is being shrunk in size.
@param[in] space_id tablespace identifier
@@ -623,6 +748,7 @@ recv_sys_close()
}
recv_spaces.clear();
+ mlog_init.clear();
}
/************************************************************
@@ -1797,6 +1923,18 @@ recv_add_to_hash_table(
recv_sys->n_addrs++;
}
+ switch (type) {
+ case MLOG_INIT_FILE_PAGE2:
+ case MLOG_ZIP_PAGE_COMPRESS:
+ /* Ignore any earlier redo log records for this page. */
+ ut_ad(recv_addr->state == RECV_NOT_PROCESSED
+ || recv_addr->state == RECV_WILL_NOT_READ);
+ recv_addr->state = RECV_WILL_NOT_READ;
+ mlog_init.add(space, page_no, start_lsn);
+ default:
+ break;
+ }
+
UT_LIST_ADD_LAST(recv_addr->rec_list, recv);
prev_field = &(recv->data);
@@ -1865,9 +2003,10 @@ recv_data_copy_to_buf(
lsn of a log record.
@param[in,out] block buffer pool page
@param[in,out] mtr mini-transaction
-@param[in,out] recv_addr recovery address */
+@param[in,out] recv_addr recovery address
+@param[in] init_lsn the initial LSN where to start recovery */
static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
- recv_addr_t* recv_addr)
+ recv_addr_t* recv_addr, lsn_t init_lsn = 0)
{
page_t* page;
page_zip_des_t* page_zip;
@@ -1921,6 +2060,15 @@ static void recv_recover_page(buf_block_t* block, mtr_t& mtr,
if (recv->start_lsn < page_lsn) {
/* Ignore this record, because there are later changes
for this page. */
+ DBUG_LOG("ib_log", "apply skip "
+ << get_mlog_string(recv->type)
+ << " LSN " << recv->start_lsn << " < "
+ << page_lsn);
+ } else if (recv->start_lsn < init_lsn) {
+ DBUG_LOG("ib_log", "init skip "
+ << get_mlog_string(recv->type)
+ << " LSN " << recv->start_lsn << " < "
+ << init_lsn);
} else if (srv_was_tablespace_truncated(space)
&& recv->start_lsn
< truncate_t::get_truncated_tablespace_init_lsn(
@@ -2168,6 +2316,7 @@ ignore:
case RECV_DISCARDED:
goto ignore;
case RECV_NOT_PROCESSED:
+ case RECV_WILL_NOT_READ:
break;
}
@@ -2181,19 +2330,95 @@ ignore:
const page_id_t page_id(recv_addr->space,
recv_addr->page_no);
- mtr.start();
- mtr.set_log_mode(MTR_LOG_NONE);
- if (buf_block_t* block = buf_page_get_gen(
- page_id, univ_page_size, RW_X_LATCH,
- NULL, BUF_GET_IF_IN_POOL,
- __FILE__, __LINE__, &mtr, NULL)) {
- buf_block_dbg_add_level(
- block, SYNC_NO_ORDER_CHECK);
- recv_recover_page(block, mtr, recv_addr);
- ut_ad(mtr.has_committed());
+ if (recv_addr->state == RECV_NOT_PROCESSED) {
+apply:
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NONE);
+ if (buf_block_t* block = buf_page_get_gen(
+ page_id, univ_page_size,
+ RW_X_LATCH, NULL,
+ BUF_GET_IF_IN_POOL,
+ __FILE__, __LINE__, &mtr, NULL)) {
+ buf_block_dbg_add_level(
+ block, SYNC_NO_ORDER_CHECK);
+ recv_recover_page(block, mtr,
+ recv_addr);
+ ut_ad(mtr.has_committed());
+ } else {
+ mtr.commit();
+ recv_read_in_area(page_id);
+ }
} else {
- mtr.commit();
- recv_read_in_area(page_id);
+ mlog_init_t::init& i = mlog_init.last(page_id);
+ const lsn_t end_lsn = UT_LIST_GET_LAST(
+ recv_addr->rec_list)->end_lsn;
+
+ if (end_lsn < i.lsn) {
+ DBUG_LOG("ib_log", "skip log for page "
+ << page_id
+ << " LSN " << end_lsn
+ << " < " << i.lsn);
+skip:
+ recv_addr->state = RECV_PROCESSED;
+ goto ignore;
+ }
+
+ fil_space_t* space = fil_space_acquire(
+ recv_addr->space);
+ if (!space) {
+ goto skip;
+ }
+
+ if (space->enable_lsn) {
+do_read:
+ space->release();
+ recv_addr->state = RECV_NOT_PROCESSED;
+ goto apply;
+ }
+
+ /* Determine if a tablespace could be
+ for an internal table for FULLTEXT INDEX.
+ For those tables, no MLOG_INDEX_LOAD record
+ used to be written when redo logging was
+ disabled. Hence, we cannot optimize
+ away page reads, because all the redo
+ log records for initializing and
+ modifying the page in the past could
+ be older than the page in the data
+ file.
+
+ The check is too broad, causing all
+ tables whose names start with FTS_ to
+ skip the optimization. */
+
+ if (strstr(space->name, "/FTS_")) {
+ goto do_read;
+ }
+
+ mtr.start();
+ mtr.set_log_mode(MTR_LOG_NONE);
+ buf_block_t* block = buf_page_create(
+ page_id, page_size_t(space->flags),
+ &mtr);
+ if (recv_addr->state == RECV_PROCESSED) {
+ /* The page happened to exist
+ in the buffer pool, or it was
+ just being read in. Before
+ buf_page_get_with_no_latch()
+ returned, all changes must have
+ been applied to the page already. */
+ mtr.commit();
+ } else {
+ i.created = true;
+ buf_block_dbg_add_level(
+ block, SYNC_NO_ORDER_CHECK);
+ mtr.x_latch_at_savepoint(0, block);
+ recv_recover_page(block, mtr,
+ recv_addr, i.lsn);
+ ut_ad(mtr.has_committed());
+ }
+
+ space->release();
}
}
}
@@ -2201,7 +2426,13 @@ ignore:
/* Wait until all the pages have been processed */
while (recv_sys->n_addrs != 0) {
- bool abort = recv_sys->found_corrupt_log;
+ const bool abort = recv_sys->found_corrupt_log
+ || recv_sys->found_corrupt_fs;
+
+ if (recv_sys->found_corrupt_fs && !srv_force_recovery) {
+ ib::info() << "Set innodb_force_recovery=1"
+ " to ignore corrupted pages.";
+ }
mutex_exit(&(recv_sys->mutex));
@@ -2240,6 +2471,10 @@ ignore:
log_mutex_enter();
mutex_enter(&(recv_sys->mutex));
+ mlog_init.reset();
+ } else if (!recv_no_ibuf_operations) {
+ /* We skipped this in buf_page_create(). */
+ mlog_init.ibuf_merge(mtr);
}
recv_sys->apply_log_recs = FALSE;
@@ -2441,9 +2676,17 @@ recv_report_corrupt_log(
}
/** Report a MLOG_INDEX_LOAD operation.
-@param[in] space_id tablespace identifier */
-ATTRIBUTE_COLD static void recv_mlog_index_load(ulint space_id)
+@param[in] space_id tablespace id
+@param[in] page_no page number
+@param[in] lsn log sequence number */
+ATTRIBUTE_COLD static void
+recv_mlog_index_load(ulint space_id, ulint page_no, lsn_t lsn)
{
+ recv_spaces_t::iterator it = recv_spaces.find(space_id);
+ if (it != recv_spaces.end()) {
+ it->second.mlog_index_load(lsn);
+ }
+
if (log_optimized_ddl_op) {
log_optimized_ddl_op(space_id);
}
@@ -2605,7 +2848,7 @@ loop:
/* fall through */
case MLOG_INDEX_LOAD:
if (type == MLOG_INDEX_LOAD) {
- recv_mlog_index_load(space);
+ recv_mlog_index_load(space, page_no, old_lsn);
}
/* fall through */
case MLOG_FILE_NAME:
@@ -2759,7 +3002,7 @@ corrupted_log:
break;
#endif /* UNIV_LOG_LSN_DEBUG */
case MLOG_INDEX_LOAD:
- recv_mlog_index_load(space);
+ recv_mlog_index_load(space, page_no, old_lsn);
break;
case MLOG_FILE_NAME:
case MLOG_FILE_DELETE:
@@ -3299,6 +3542,7 @@ recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace)
/* The tablespace was found, and there
are some redo log records for it. */
fil_names_dirty(i->second.space);
+ i->second.space->enable_lsn = i->second.enable_lsn;
} else if (i->second.name == "") {
ib::error() << "Missing MLOG_FILE_NAME"
" or MLOG_FILE_DELETE"
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index 1f4ffdf1e8c..c6d01392f31 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -4933,23 +4933,28 @@ wait_again:
if (indexes[i]->type & DICT_FTS) {
row_fts_psort_info_destroy(psort_info, merge_info);
fts_psort_initiated = false;
- } else if (error != DB_SUCCESS || !online) {
- /* Do not apply any online log. */
+ } else if (dict_index_is_spatial(indexes[i])) {
+ /* We never disable redo logging for
+ creating SPATIAL INDEX. Avoid writing any
+ unnecessary MLOG_INDEX_LOAD record. */
} else if (old_table != new_table) {
ut_ad(!sort_idx->online_log);
ut_ad(sort_idx->online_status
== ONLINE_INDEX_COMPLETE);
- } else {
- if (dict_index_is_spatial(indexes[i])) {
- /* We never disable redo logging for
- creating SPATIAL INDEX. Avoid writing any
- unnecessary MLOG_INDEX_LOAD record. */
- } else if (FlushObserver* flush_observer =
- trx->get_flush_observer()) {
- flush_observer->flush();
- row_merge_write_redo(indexes[i]);
+ } else if (FlushObserver* flush_observer =
+ trx->get_flush_observer()) {
+ if (error != DB_SUCCESS) {
+ flush_observer->interrupted();
}
+ flush_observer->flush();
+ row_merge_write_redo(indexes[i]);
+ }
+ if (old_table != new_table
+ || (indexes[i]->type & (DICT_FTS | DICT_SPATIAL))
+ || error != DB_SUCCESS || !online) {
+ /* Do not apply any online log. */
+ } else {
if (global_system_variables.log_warnings > 2) {
sql_print_information(
"InnoDB: Online DDL : Applying"
@@ -5056,13 +5061,7 @@ func_exit:
flush_observer->flush();
- trx->remove_flush_observer();
-
- if (trx_is_interrupted(trx)) {
- error = DB_INTERRUPTED;
- }
-
- if (error == DB_SUCCESS && old_table != new_table) {
+ if (old_table != new_table) {
for (const dict_index_t* index
= dict_table_get_first_index(new_table);
index != NULL;
@@ -5073,6 +5072,12 @@ func_exit:
}
}
}
+
+ trx->remove_flush_observer();
+
+ if (trx_is_interrupted(trx)) {
+ error = DB_INTERRUPTED;
+ }
}
DBUG_RETURN(error);
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 8164d1df133..a60170de48e 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -1984,7 +1984,8 @@ files_checked:
recv_apply_hashed_log_recs(true);
- if (recv_sys->found_corrupt_log) {
+ if (recv_sys->found_corrupt_log
+ || recv_sys->found_corrupt_fs) {
return(srv_init_abort(DB_CORRUPTION));
}