summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/suite/innodb/include/innodb_doublewrite.inc257
-rw-r--r--mysql-test/suite/innodb/r/doublewrite.result2
-rw-r--r--mysql-test/suite/innodb/r/innodb_doublewrite.result111
-rw-r--r--mysql-test/suite/innodb/r/innodb_doublewrite_odirect.result111
-rw-r--r--mysql-test/suite/innodb/t/innodb_doublewrite-master.opt7
-rw-r--r--mysql-test/suite/innodb/t/innodb_doublewrite.test1
-rw-r--r--mysql-test/suite/innodb/t/innodb_doublewrite_odirect-master.opt6
-rw-r--r--mysql-test/suite/innodb/t/innodb_doublewrite_odirect.test1
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result33
-rw-r--r--mysql-test/suite/sys_vars/r/sysvars_innodb.result20
-rw-r--r--mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test31
-rw-r--r--storage/innobase/buf/buf0buf.cc12
-rw-r--r--storage/innobase/buf/buf0dblwr.cc254
-rw-r--r--storage/innobase/fsp/fsp0sysspace.cc2
-rw-r--r--storage/innobase/handler/ha_innodb.cc46
-rw-r--r--storage/innobase/handler/i_s.cc1
-rw-r--r--storage/innobase/include/buf0dblwr.h11
-rw-r--r--storage/innobase/include/fil0fil.h4
-rw-r--r--storage/innobase/include/fil0fil.ic3
-rw-r--r--storage/innobase/include/log0recv.h21
-rw-r--r--storage/innobase/include/srv0srv.h3
-rw-r--r--storage/innobase/log/log0recv.cc10
-rw-r--r--storage/innobase/srv/srv0srv.cc4
23 files changed, 867 insertions, 84 deletions
diff --git a/mysql-test/suite/innodb/include/innodb_doublewrite.inc b/mysql-test/suite/innodb/include/innodb_doublewrite.inc
new file mode 100644
index 00000000000..d537280d178
--- /dev/null
+++ b/mysql-test/suite/innodb/include/innodb_doublewrite.inc
@@ -0,0 +1,257 @@
+# Embedded server does not support crashing
+--source include/not_embedded.inc
+--source include/have_innodb.inc
+--source include/innodb_page_size.inc
+--source include/have_debug.inc
+
+call mtr.add_suppression("InnoDB: Failed to set O_DIRECT on file.*");
+call mtr.add_suppression("InnoDB: Cannot recover page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] from the doublewrite buffer because it was written in reduced-doublewrite mode");
+call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace .*");
+call mtr.add_suppression("InnoDB: Failed to read file .* at offset .*: Page read from tablespace is corrupted.");
+call mtr.add_suppression("InnoDB: Table .* is corrupted. Please drop the table and recreate.");
+
+# Slow shutdown and restart to make sure ibuf merge is finished
+SET GLOBAL innodb_fast_shutdown = 0;
+
+let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
+let MYSQLD_DATADIR=`select @@datadir`;
+
+SET GLOBAL innodb_doublewrite=2;
+
+show variables like 'innodb_doublewrite';
+show variables like 'innodb_fil_make_page_dirty_debug';
+show variables like 'innodb_saved_page_number_debug';
+
+CREATE TABLE t1(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb;
+start transaction;
+INSERT INTO t1(b) VALUES(repeat('#',200));
+INSERT INTO t1(b) VALUES(repeat('+',200));
+INSERT INTO t1(b) VALUES(repeat('/',200));
+INSERT INTO t1(b) VALUES(repeat('|',200));
+INSERT INTO t1(b) VALUES(repeat('\\',200));
+INSERT INTO t1(b) VALUES(repeat('-',200));
+INSERT INTO t1(b) VALUES(repeat('&',200));
+INSERT INTO t1(b) VALUES(repeat('%',200));
+INSERT INTO t1(b) VALUES(repeat('@',200));
+INSERT INTO t1(b) VALUES(repeat('?',200));
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+commit work;
+
+select space from information_schema.innodb_sys_tables where name = 'test/t1' into @space_id;
+let SPACE_ID=`select space from information_schema.innodb_sys_tables where name = 'test/t1'`;
+
+--echo # Ensure that dirty pages of table t1 is flushed.
+flush tables t1 for export;
+unlock tables;
+
+begin;
+insert into t1(b) values (repeat('_', 42));
+
+--source ../include/no_checkpoint_start.inc
+
+--echo # Make the first page dirty for table t1
+set global innodb_saved_page_number_debug = 0;
+set global innodb_fil_make_page_dirty_debug = @space_id;
+
+--echo # Ensure that dirty pages of table t1 are flushed.
+set global innodb_buf_flush_list_now = 1;
+
+--let CLEANUP_IF_CHECKPOINT=drop table t1;
+--source ../include/no_checkpoint_end.inc
+
+--echo # Backup table and system tablespace before corrupting
+--copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/test/t1.ibd.backup
+--copy_file $MYSQLD_DATADIR/ibdata1 $MYSQLD_DATADIR/ibdata1.backup
+
+#
+# Corrupt page 5 from table t1 and write page no 5 to first doublewrite
+# buffer
+#
+perl;
+use IO::Handle;
+use Data::HexDump;
+my $fname= "$ENV{'MYSQLD_DATADIR'}test/t1.ibd";
+my $page_size = $ENV{INNODB_PAGE_SIZE};
+my $tspace_id = $ENV{SPACE_ID};
+my $page;
+
+open(FILE, "+<", $fname) or die "Unable to open $fname\n";;
+binmode FILE;
+sysseek(FILE, 5 * $page_size + 48, 0)||die "Unable to seek $fname\n";
+print FILE pack("H*", "deadbeefdeadbeefdeadbeefdeadbeef");
+close FILE or die "Unable to close $fname\n";
+
+open(FILE, "+<", "$ENV{MYSQLD_DATADIR}ibdata1")||die "cannot open ibdata1\n";
+sysseek(FILE, 6 * $page_size - 190, 0)||die "Unable to seek ibdata1\n";
+sysread(FILE, $_, 12) == 12||die "Unable to read TRX_SYS\n";
+my($magic,$d1,$d2)=unpack "NNN", $_;
+die "magic=$magic, $d1, $d2\n" unless $magic == 536853855 && $d2 >= $d1 + 64;
+my($offset)=$d1*$page_size;
+# Find and read the page type from first page in the doublewrite buffer
+sysseek(FILE, $offset, 0)||die "Unable to seek ibdata1\n";
+sysread(FILE, $_, 4096)== 4096||die "Cannot read doublewrite\n";
+sysseek(FILE, $offset + 24, 0)||die "Unable to seek ibdata1\n";
+sysread(FILE, $_, 2) == 2||die "Unable to read FIL_PAGE_TYPE\n";
+my($pagetype)=unpack "n", $_;
+die "Not reduced doublewrite page page_type=$pagetype\n" unless $pagetype == 32124;
+# Find and read the space_id + page_no from first doublewrite page
+sysseek(FILE, $offset + 38, 0)||die "Unable to seek ibdata1\n";
+sysread(FILE, $_, 10) == 10||die "Unable to read doublewrite buf\n";
+my($first_free,$space_id,$page_no)=unpack ("nNN", $_);
+# Write space_id + page_no = 5 to first doublewrite page
+sysseek(FILE, $offset, 0)||die "Unable to seek ibdata1\n";
+sysread(FILE, $_, $page_size)==$page_size||die "Cannot read doublewrite\n";
+sysseek(FILE, $offset, 0)||die "Unable to seek ibdata1\n";
+substr ($_, 38, 10) = pack("nNN", 1, $tspace_id, 5);
+# Replace the innodb_checksum_algorithm=none checksum
+substr ($_, 0, 4) = pack("N", 0xdeadbeef);
+substr ($_, $page_size - 8, 4) = pack("N", 0xdeadbeef);
+syswrite(FILE, $_, $page_size)==$page_size||die;
+close(FILE);
+exit 0;
+EOF
+
+--enable_reconnect
+# Write file to make mysql-test-run.pl start up the server again
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--error 1
+--source include/wait_until_connected_again.inc
+
+--error 1932
+SELECT * FROM t1;
+
+--source include/shutdown_mysqld.inc
+
+--let SEARCH_RANGE = 10000000
+--let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err
+--let SEARCH_PATTERN=buffer because it was written in reduced-doublewrite mode
+--replace_regex /FOUND [1-9][0-9]*/ FOUND 1/
+--source include/search_pattern_in_file.inc
+--let SEARCH_PATTERN=Database page corruption on disk or a failed file read of tables
+--replace_regex /FOUND [1-9][0-9]*/ FOUND 1/
+--source include/search_pattern_in_file.inc
+
+--echo # Backup table and system tablespace BACK
+--move_file $MYSQLD_DATADIR/test/t1.ibd.backup $MYSQLD_DATADIR/test/t1.ibd
+--move_file $MYSQLD_DATADIR/ibdata1.backup $MYSQLD_DATADIR/ibdata1
+
+--source include/start_mysqld.inc
+
+CHECK TABLE t1;
+SELECT COUNT(*) FROM t1;
+
+SET GLOBAL innodb_doublewrite=1;
+CREATE TABLE t2(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb;
+start transaction;
+INSERT INTO t2(b) VALUES(repeat('#',200));
+INSERT INTO t2(b) VALUES(repeat('+',200));
+INSERT INTO t2(b) VALUES(repeat('/',200));
+INSERT INTO t2(b) VALUES(repeat('|',200));
+INSERT INTO t2(b) VALUES(repeat('\\',200));
+INSERT INTO t2(b) VALUES(repeat('-',200));
+INSERT INTO t2(b) VALUES(repeat('&',200));
+INSERT INTO t2(b) VALUES(repeat('%',200));
+INSERT INTO t2(b) VALUES(repeat('@',200));
+INSERT INTO t2(b) VALUES(repeat('?',200));
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+commit work;
+
+select space from information_schema.innodb_sys_tables where name = 'test/t2' into @space_id;
+let SPACE_ID=`select space from information_schema.innodb_sys_tables where name = 'test/t2'`;
+
+--echo # Ensure that dirty pages of table t2 is flushed.
+flush tables t2 for export;
+unlock tables;
+
+begin;
+insert into t2(b) values (repeat('_', 42));
+
+--source ../include/no_checkpoint_start.inc
+
+--echo # Make the first page dirty for table t2
+set global innodb_saved_page_number_debug = 0;
+set global innodb_fil_make_page_dirty_debug = @space_id;
+
+--echo # Ensure that dirty pages of table t2 are flushed.
+set global innodb_buf_flush_list_now = 1;
+
+--let CLEANUP_IF_CHECKPOINT=drop table t2;
+--source ../include/no_checkpoint_end.inc
+
+#
+# Write page 5 from t2.ibd to first doublewrite buffer page
+# and then corrupt the page 5 from t2.ibd.
+#
+perl;
+use IO::Handle;
+my $fname= "$ENV{'MYSQLD_DATADIR'}test/t2.ibd";
+my $page_size = $ENV{INNODB_PAGE_SIZE};
+my $tspace_id = $ENV{SPACE_ID};
+my $page;
+my $page2;
+
+open(FILE, "+<", "$ENV{MYSQLD_DATADIR}ibdata1")||die "cannot open ibdata1\n";
+sysseek(FILE, 6 * $page_size - 190, 0)||die "Unable to seek ibdata1\n";
+sysread(FILE, $_, 12) == 12||die "Unable to read TRX_SYS\n";
+my($magic,$d1,$d2)=unpack "NNN", $_;
+die "magic=$magic, $d1, $d2\n" unless $magic == 536853855 && $d2 >= $d1 + 64;
+my($offset)=$d1*$page_size;
+# Find and read the page type from first page in the doublewrite buffer
+sysseek(FILE, $offset, 0)||die "Unable to seek ibdata1\n";
+sysread(FILE, $_, 4096)== 4096||die "Cannot read doublewrite\n";
+sysseek(FILE, $offset + 24, 0)||die "Unable to seek ibdata1\n";
+sysread(FILE, $_, 2) == 2||die "Unable to read FIL_PAGE_TYPE\n";
+my($pagetype)=unpack "n", $_;
+die "Not full doublewrite page page_type=$pagetype\n" unless $pagetype != 32124;
+sysseek(FILE, $offset, 0)||die "Unable to seek ibdata1\n";
+# Read page 5 from t2.ibd
+open(FILE2, "+<", $fname) or die "Unable to open $fname\n";;
+binmode FILE2;
+sysseek(FILE2, 5 * $page_size, 0)||die "Unable to seek $fname\n";
+sysread(FILE2, $page2, $page_size)==$page_size||die "Cannot read page from $fname\n";
+# Write page 5 from t2.ibd to first doublewrite buffer page
+syswrite(FILE, $page2, $page_size)==$page_size||die "Cannot write doublewrite page to ibdata1\n";
+close(FILE);
+close(FILE2);
+# Corrupt page 5 from t2.ibd
+open(FILE, "+<", $fname) or die "Unable to open $fname\n";;
+binmode FILE;
+sysseek(FILE, 5 * $page_size + 50, 0)||die "Unable to seek $fname\n";
+print FILE pack("H*", "deadbeefdeadbeefdeadbeefdddddddddddffffffffffffeeeeeeeeeeeebbbbbbbbbbbb");
+close(FILE);
+exit 0;
+EOF
+
+--source include/start_mysqld.inc
+
+CHECK TABLE t1;
+CHECK TABLE t2;
+
+SELECT COUNT(*) FROM t1;
+SELECT COUNT(*) FROM t2;
+
+DROP TABLE t1;
+DROP TABLE t2;
+
+--let SEARCH_RANGE = 10000000
+--let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err
+--let SEARCH_PATTERN=Trying to recover page
+--replace_regex /FOUND [1-9][0-9]*/ FOUND 1/
+--source include/search_pattern_in_file.inc
+--let SEARCH_PATTERN=Recovered page
+--replace_regex /FOUND [1-9][0-9]*/ FOUND 1/
+--source include/search_pattern_in_file.inc
diff --git a/mysql-test/suite/innodb/r/doublewrite.result b/mysql-test/suite/innodb/r/doublewrite.result
index 61c81ee9dff..62421ddd312 100644
--- a/mysql-test/suite/innodb/r/doublewrite.result
+++ b/mysql-test/suite/innodb/r/doublewrite.result
@@ -6,7 +6,7 @@
SET GLOBAL innodb_fast_shutdown = 0;
show variables like 'innodb_doublewrite';
Variable_name Value
-innodb_doublewrite ON
+innodb_doublewrite 1
show variables like 'innodb_fil_make_page_dirty_debug';
Variable_name Value
innodb_fil_make_page_dirty_debug 0
diff --git a/mysql-test/suite/innodb/r/innodb_doublewrite.result b/mysql-test/suite/innodb/r/innodb_doublewrite.result
new file mode 100644
index 00000000000..8bd112f9dcd
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb_doublewrite.result
@@ -0,0 +1,111 @@
+call mtr.add_suppression("InnoDB: Failed to set O_DIRECT on file.*");
+call mtr.add_suppression("InnoDB: Cannot recover page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] from the doublewrite buffer because it was written in reduced-doublewrite mode");
+call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace .*");
+call mtr.add_suppression("InnoDB: Failed to read file .* at offset .*: Page read from tablespace is corrupted.");
+call mtr.add_suppression("InnoDB: Table .* is corrupted. Please drop the table and recreate.");
+SET GLOBAL innodb_fast_shutdown = 0;
+SET GLOBAL innodb_doublewrite=2;
+show variables like 'innodb_doublewrite';
+Variable_name Value
+innodb_doublewrite 2
+show variables like 'innodb_fil_make_page_dirty_debug';
+Variable_name Value
+innodb_fil_make_page_dirty_debug 0
+show variables like 'innodb_saved_page_number_debug';
+Variable_name Value
+innodb_saved_page_number_debug 0
+CREATE TABLE t1(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb;
+start transaction;
+INSERT INTO t1(b) VALUES(repeat('#',200));
+INSERT INTO t1(b) VALUES(repeat('+',200));
+INSERT INTO t1(b) VALUES(repeat('/',200));
+INSERT INTO t1(b) VALUES(repeat('|',200));
+INSERT INTO t1(b) VALUES(repeat('\\',200));
+INSERT INTO t1(b) VALUES(repeat('-',200));
+INSERT INTO t1(b) VALUES(repeat('&',200));
+INSERT INTO t1(b) VALUES(repeat('%',200));
+INSERT INTO t1(b) VALUES(repeat('@',200));
+INSERT INTO t1(b) VALUES(repeat('?',200));
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+commit work;
+select space from information_schema.innodb_sys_tables where name = 'test/t1' into @space_id;
+# Ensure that dirty pages of table t1 is flushed.
+flush tables t1 for export;
+unlock tables;
+begin;
+insert into t1(b) values (repeat('_', 42));
+# Make the first page dirty for table t1
+set global innodb_saved_page_number_debug = 0;
+set global innodb_fil_make_page_dirty_debug = @space_id;
+# Ensure that dirty pages of table t1 are flushed.
+set global innodb_buf_flush_list_now = 1;
+# Kill the server
+# Backup table and system tablespace before corrupting
+SELECT * FROM t1;
+ERROR 42S02: Table 'test.t1' doesn't exist in engine
+ FOUND 1 /buffer because it was written in reduced-doublewrite mode/ in mysqld.1.err
+ FOUND 1 /Database page corruption on disk or a failed file read of tables/ in mysqld.1.err
+# Backup table and system tablespace BACK
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2560
+SET GLOBAL innodb_doublewrite=1;
+CREATE TABLE t2(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb;
+start transaction;
+INSERT INTO t2(b) VALUES(repeat('#',200));
+INSERT INTO t2(b) VALUES(repeat('+',200));
+INSERT INTO t2(b) VALUES(repeat('/',200));
+INSERT INTO t2(b) VALUES(repeat('|',200));
+INSERT INTO t2(b) VALUES(repeat('\\',200));
+INSERT INTO t2(b) VALUES(repeat('-',200));
+INSERT INTO t2(b) VALUES(repeat('&',200));
+INSERT INTO t2(b) VALUES(repeat('%',200));
+INSERT INTO t2(b) VALUES(repeat('@',200));
+INSERT INTO t2(b) VALUES(repeat('?',200));
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+commit work;
+select space from information_schema.innodb_sys_tables where name = 'test/t2' into @space_id;
+# Ensure that dirty pages of table t2 is flushed.
+flush tables t2 for export;
+unlock tables;
+begin;
+insert into t2(b) values (repeat('_', 42));
+# Make the first page dirty for table t2
+set global innodb_saved_page_number_debug = 0;
+set global innodb_fil_make_page_dirty_debug = @space_id;
+# Ensure that dirty pages of table t2 are flushed.
+set global innodb_buf_flush_list_now = 1;
+# Kill the server
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+CHECK TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 check status OK
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2560
+SELECT COUNT(*) FROM t2;
+COUNT(*)
+2560
+DROP TABLE t1;
+DROP TABLE t2;
+ FOUND 1 /Trying to recover page/ in mysqld.1.err
+ FOUND 1 /Recovered page/ in mysqld.1.err
diff --git a/mysql-test/suite/innodb/r/innodb_doublewrite_odirect.result b/mysql-test/suite/innodb/r/innodb_doublewrite_odirect.result
new file mode 100644
index 00000000000..8bd112f9dcd
--- /dev/null
+++ b/mysql-test/suite/innodb/r/innodb_doublewrite_odirect.result
@@ -0,0 +1,111 @@
+call mtr.add_suppression("InnoDB: Failed to set O_DIRECT on file.*");
+call mtr.add_suppression("InnoDB: Cannot recover page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] from the doublewrite buffer because it was written in reduced-doublewrite mode");
+call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace .*");
+call mtr.add_suppression("InnoDB: Failed to read file .* at offset .*: Page read from tablespace is corrupted.");
+call mtr.add_suppression("InnoDB: Table .* is corrupted. Please drop the table and recreate.");
+SET GLOBAL innodb_fast_shutdown = 0;
+SET GLOBAL innodb_doublewrite=2;
+show variables like 'innodb_doublewrite';
+Variable_name Value
+innodb_doublewrite 2
+show variables like 'innodb_fil_make_page_dirty_debug';
+Variable_name Value
+innodb_fil_make_page_dirty_debug 0
+show variables like 'innodb_saved_page_number_debug';
+Variable_name Value
+innodb_saved_page_number_debug 0
+CREATE TABLE t1(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb;
+start transaction;
+INSERT INTO t1(b) VALUES(repeat('#',200));
+INSERT INTO t1(b) VALUES(repeat('+',200));
+INSERT INTO t1(b) VALUES(repeat('/',200));
+INSERT INTO t1(b) VALUES(repeat('|',200));
+INSERT INTO t1(b) VALUES(repeat('\\',200));
+INSERT INTO t1(b) VALUES(repeat('-',200));
+INSERT INTO t1(b) VALUES(repeat('&',200));
+INSERT INTO t1(b) VALUES(repeat('%',200));
+INSERT INTO t1(b) VALUES(repeat('@',200));
+INSERT INTO t1(b) VALUES(repeat('?',200));
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+INSERT INTO t1(b) SELECT b FROM t1;
+commit work;
+select space from information_schema.innodb_sys_tables where name = 'test/t1' into @space_id;
+# Ensure that dirty pages of table t1 is flushed.
+flush tables t1 for export;
+unlock tables;
+begin;
+insert into t1(b) values (repeat('_', 42));
+# Make the first page dirty for table t1
+set global innodb_saved_page_number_debug = 0;
+set global innodb_fil_make_page_dirty_debug = @space_id;
+# Ensure that dirty pages of table t1 are flushed.
+set global innodb_buf_flush_list_now = 1;
+# Kill the server
+# Backup table and system tablespace before corrupting
+SELECT * FROM t1;
+ERROR 42S02: Table 'test.t1' doesn't exist in engine
+ FOUND 1 /buffer because it was written in reduced-doublewrite mode/ in mysqld.1.err
+ FOUND 1 /Database page corruption on disk or a failed file read of tables/ in mysqld.1.err
+# Backup table and system tablespace BACK
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2560
+SET GLOBAL innodb_doublewrite=1;
+CREATE TABLE t2(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb;
+start transaction;
+INSERT INTO t2(b) VALUES(repeat('#',200));
+INSERT INTO t2(b) VALUES(repeat('+',200));
+INSERT INTO t2(b) VALUES(repeat('/',200));
+INSERT INTO t2(b) VALUES(repeat('|',200));
+INSERT INTO t2(b) VALUES(repeat('\\',200));
+INSERT INTO t2(b) VALUES(repeat('-',200));
+INSERT INTO t2(b) VALUES(repeat('&',200));
+INSERT INTO t2(b) VALUES(repeat('%',200));
+INSERT INTO t2(b) VALUES(repeat('@',200));
+INSERT INTO t2(b) VALUES(repeat('?',200));
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+INSERT INTO t2(b) SELECT b FROM t2;
+commit work;
+select space from information_schema.innodb_sys_tables where name = 'test/t2' into @space_id;
+# Ensure that dirty pages of table t2 is flushed.
+flush tables t2 for export;
+unlock tables;
+begin;
+insert into t2(b) values (repeat('_', 42));
+# Make the first page dirty for table t2
+set global innodb_saved_page_number_debug = 0;
+set global innodb_fil_make_page_dirty_debug = @space_id;
+# Ensure that dirty pages of table t2 are flushed.
+set global innodb_buf_flush_list_now = 1;
+# Kill the server
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+CHECK TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 check status OK
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2560
+SELECT COUNT(*) FROM t2;
+COUNT(*)
+2560
+DROP TABLE t1;
+DROP TABLE t2;
+ FOUND 1 /Trying to recover page/ in mysqld.1.err
+ FOUND 1 /Recovered page/ in mysqld.1.err
diff --git a/mysql-test/suite/innodb/t/innodb_doublewrite-master.opt b/mysql-test/suite/innodb/t/innodb_doublewrite-master.opt
new file mode 100644
index 00000000000..82df307e376
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_doublewrite-master.opt
@@ -0,0 +1,7 @@
+--innodb-fast-shutdown=2
+--innodb-file-per-table
+--innodb_file_format='Barracuda'
+--innodb_flush_log_at_trx_commit=1
+--innodb_buffer_pool_load_at_startup=OFF
+
+
diff --git a/mysql-test/suite/innodb/t/innodb_doublewrite.test b/mysql-test/suite/innodb/t/innodb_doublewrite.test
new file mode 100644
index 00000000000..ef6191b2449
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_doublewrite.test
@@ -0,0 +1 @@
+--source suite/innodb/include/innodb_doublewrite.inc
diff --git a/mysql-test/suite/innodb/t/innodb_doublewrite_odirect-master.opt b/mysql-test/suite/innodb/t/innodb_doublewrite_odirect-master.opt
new file mode 100644
index 00000000000..ee9cd08c8be
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_doublewrite_odirect-master.opt
@@ -0,0 +1,6 @@
+--innodb-fast-shutdown=2
+--innodb-file-per-table
+--innodb_file_format='Barracuda'
+--innodb_flush_log_at_trx_commit=1
+--innodb-flush-method=O_DIRECT
+--innodb_buffer_pool_load_at_startup=OFF
diff --git a/mysql-test/suite/innodb/t/innodb_doublewrite_odirect.test b/mysql-test/suite/innodb/t/innodb_doublewrite_odirect.test
new file mode 100644
index 00000000000..ef6191b2449
--- /dev/null
+++ b/mysql-test/suite/innodb/t/innodb_doublewrite_odirect.test
@@ -0,0 +1 @@
+--source suite/innodb/include/innodb_doublewrite.inc
diff --git a/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result b/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result
index 4a5baf0aeda..641d8cf5cc8 100644
--- a/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result
@@ -4,18 +4,31 @@ COUNT(@@GLOBAL.innodb_doublewrite)
1
1 Expected
'#---------------------BS_STVARS_026_02----------------------#'
-SET @@GLOBAL.innodb_doublewrite=1;
-ERROR HY000: Variable 'innodb_doublewrite' is a read only variable
-Expected error 'Read only variable'
+SET @global_start_value = @@global.innodb_doublewrite;
+SELECT @global_start_value;
+@global_start_value
+1
+SET @@GLOBAL.innodb_doublewrite = 1 ;
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
COUNT(@@GLOBAL.innodb_doublewrite)
1
1 Expected
+SET @@GLOBAL.innodb_doublewrite = 2;
+SELECT @@GLOBAL.innodb_doublewrite;
+@@GLOBAL.innodb_doublewrite
+2
+SET @@GLOBAL.innodb_doublewrite = 0;
+Warnings:
+Warning 1210 innodb_doublewrite can not be dynamically changed to or from 0. Do a clean shutdown if you want to change it from or to 0.
+SELECT @@GLOBAL.innodb_doublewrite;
+@@GLOBAL.innodb_doublewrite
+2
+2 Expected
'#---------------------BS_STVARS_026_03----------------------#'
-SELECT IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE
+SELECT @@GLOBAL.innodb_doublewrite = VARIABLE_VALUE
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_doublewrite';
-IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE
+@@GLOBAL.innodb_doublewrite = VARIABLE_VALUE
1
1 Expected
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
@@ -41,13 +54,11 @@ COUNT(@@innodb_doublewrite)
SELECT COUNT(@@local.innodb_doublewrite);
ERROR HY000: Variable 'innodb_doublewrite' is a GLOBAL variable
Expected error 'Variable is a GLOBAL variable'
-SELECT COUNT(@@SESSION.innodb_doublewrite);
-ERROR HY000: Variable 'innodb_doublewrite' is a GLOBAL variable
-Expected error 'Variable is a GLOBAL variable'
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
COUNT(@@GLOBAL.innodb_doublewrite)
1
1 Expected
-SELECT innodb_doublewrite = @@SESSION.innodb_doublewrite;
-ERROR 42S22: Unknown column 'innodb_doublewrite' in 'field list'
-Expected error 'Readonly variable'
+SET @@global.innodb_doublewrite = @global_start_value;
+SELECT @@global.innodb_doublewrite;
+@@global.innodb_doublewrite
+1
diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result
index 7402b84dc96..516975f1675 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result
@@ -918,18 +918,18 @@ READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_DOUBLEWRITE
SESSION_VALUE NULL
-GLOBAL_VALUE ON
+GLOBAL_VALUE 1
GLOBAL_VALUE_ORIGIN COMPILE-TIME
-DEFAULT_VALUE ON
+DEFAULT_VALUE 1
VARIABLE_SCOPE GLOBAL
-VARIABLE_TYPE BOOLEAN
-VARIABLE_COMMENT Enable InnoDB doublewrite buffer (enabled by default). Disable with --skip-innodb-doublewrite.
-NUMERIC_MIN_VALUE NULL
-NUMERIC_MAX_VALUE NULL
-NUMERIC_BLOCK_SIZE NULL
-ENUM_VALUE_LIST OFF,ON
-READ_ONLY YES
-COMMAND_LINE_ARGUMENT NONE
+VARIABLE_TYPE BIGINT UNSIGNED
+VARIABLE_COMMENT 0=Disable InnoDB doublewrite buffer.1=Enable full doublewrite mode (default).2=Enable reduced doublewrite mode.
+NUMERIC_MIN_VALUE 0
+NUMERIC_MAX_VALUE 2
+NUMERIC_BLOCK_SIZE 0
+ENUM_VALUE_LIST NULL
+READ_ONLY NO
+COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_DOUBLEWRITE_BATCH_SIZE
SESSION_VALUE NULL
GLOBAL_VALUE 120
diff --git a/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test b/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test
index 1ae10d0f7cf..4827aafccde 100644
--- a/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test
+++ b/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test
@@ -37,36 +37,40 @@ SELECT COUNT(@@GLOBAL.innodb_doublewrite);
# Check if Value can set #
####################################################################
---error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SET @@GLOBAL.innodb_doublewrite=1;
---echo Expected error 'Read only variable'
+SET @global_start_value = @@global.innodb_doublewrite;
+SELECT @global_start_value;
+
+SET @@GLOBAL.innodb_doublewrite = 1 ;
+
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
--echo 1 Expected
+SET @@GLOBAL.innodb_doublewrite = 2;
+SELECT @@GLOBAL.innodb_doublewrite;
+
+SET @@GLOBAL.innodb_doublewrite = 0;
+SELECT @@GLOBAL.innodb_doublewrite;
+--echo 2 Expected
--echo '#---------------------BS_STVARS_026_03----------------------#'
#################################################################
# Check if the value in GLOBAL Table matches value in variable #
#################################################################
---disable_warnings
-SELECT IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE
+SELECT @@GLOBAL.innodb_doublewrite = VARIABLE_VALUE
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_doublewrite';
---enable_warnings
--echo 1 Expected
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
--echo 1 Expected
---disable_warnings
SELECT COUNT(VARIABLE_VALUE)
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_doublewrite';
---enable_warnings
--echo 1 Expected
@@ -92,15 +96,8 @@ SELECT COUNT(@@innodb_doublewrite);
SELECT COUNT(@@local.innodb_doublewrite);
--echo Expected error 'Variable is a GLOBAL variable'
---Error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SELECT COUNT(@@SESSION.innodb_doublewrite);
---echo Expected error 'Variable is a GLOBAL variable'
-
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
--echo 1 Expected
---Error ER_BAD_FIELD_ERROR
-SELECT innodb_doublewrite = @@SESSION.innodb_doublewrite;
---echo Expected error 'Readonly variable'
-
-
+SET @@global.innodb_doublewrite = @global_start_value;
+SELECT @@global.innodb_doublewrite;
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 61006c8d89d..706011eebfb 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -5994,6 +5994,18 @@ database_corrupted:
" You can use CHECK TABLE to scan"
" your table for corruption. "
<< FORCE_RECOVERY_MSG;
+
+ /* Remove the page that is corrupted when
+ recovering. */
+ if (recv_recovery_on) {
+ ib::info() << "Removing the corrupted page "
+ << bpage->id << " in tablepace "
+ << space->name << " from recovered pages.";
+ mutex_enter(&recv_sys->mutex);
+ ut_ad(recv_sys->n_addrs > 0);
+ recv_sys->n_addrs--;
+ mutex_exit(&recv_sys->mutex);
+ }
}
if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 17b2229f1da..61151e4bf4b 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -152,14 +152,28 @@ buf_dblwr_init(
ut_zalloc_nokey(buf_size * sizeof(bool)));
buf_dblwr->write_buf_unaligned = static_cast<byte*>(
- ut_malloc_nokey((1 + buf_size) * UNIV_PAGE_SIZE));
+ ut_zalloc_nokey((1 + buf_size) * UNIV_PAGE_SIZE));
buf_dblwr->write_buf = static_cast<byte*>(
ut_align(buf_dblwr->write_buf_unaligned,
UNIV_PAGE_SIZE));
+ buf_dblwr->header_unaligned = static_cast<byte*>(
+ ut_zalloc_nokey(2 * UNIV_PAGE_SIZE));
+
+ buf_dblwr->header = static_cast<byte*>(
+ ut_align(buf_dblwr->header_unaligned,
+ UNIV_PAGE_SIZE));
+
buf_dblwr->buf_block_arr = static_cast<buf_page_t**>(
ut_zalloc_nokey(buf_size * sizeof(void*)));
+
+ /* Write the page number and the page type to the doublewrite
+ * header in case it gets used. */
+ mach_write_to_4(buf_dblwr->header + FIL_PAGE_OFFSET,
+ buf_dblwr->block1);
+ mach_write_to_2(buf_dblwr->header + FIL_PAGE_TYPE,
+ FIL_PAGE_TYPE_DBLWR_HEADER);
}
/** Create the doublewrite buffer if the doublewrite buffer header
@@ -342,6 +356,73 @@ too_small:
goto start_again;
}
+/***************************************************************//**
+Zeroes out the pages in the doublewrite buffer on disk, and flushes them.
+This function must be called before the first double-write batch flush after
+the doublewrite mode is changed by the user.
+This function is only called from buf_dblwr_flush_buffered_writes(), while
+it is holding buf_dblwr->mutex, so this function need not be thread-safe. */
+static
+void
+buf_dblwr_reset(
+ ulint doublewrite_mode) {
+ ulint i = 0;
+ void* page_unaligned = ut_zalloc_nokey(
+ (TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + 1) * UNIV_PAGE_SIZE);
+ byte* page = page_align((byte*)page_unaligned + UNIV_PAGE_SIZE);
+
+ ut_a(doublewrite_mode);
+
+ buf_dblwr_being_created = TRUE;
+
+ /* Reset the first half of doublewrite buffer on disk.
+ * We handle the first block separately because it determines whether
+ * the last flush was done in the reduced-doublewrite mode.
+ */
+ if (doublewrite_mode == 2) {
+ /* Write an empty header page */
+ i = 1;
+ memcpy(page, buf_dblwr->header, FIL_PAGE_DATA);
+ buf_flush_init_for_writing(NULL, page, NULL, 0);
+ ut_ad(!buf_page_is_corrupted(FALSE, page, univ_page_size, NULL));
+ page += UNIV_PAGE_SIZE;
+ }
+
+ for (; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; ++i) {
+ mach_write_to_4(page + FIL_PAGE_OFFSET, buf_dblwr->block1 + i);
+ buf_flush_init_for_writing(NULL, page, NULL, 0);
+ page += UNIV_PAGE_SIZE;
+ }
+
+ IORequest write_request(IORequest::WRITE);
+ const page_id_t page_id(TRX_SYS_SPACE, buf_dblwr->block1);
+
+ page = page_align((byte*)page_unaligned + UNIV_PAGE_SIZE);
+
+ fil_io(write_request, true, page_id, univ_page_size,
+ 0,
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+ (void*) page, NULL);
+
+ /* Reset the second half of doublewrite buffer on disk. */
+
+ for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; ++i) {
+ mach_write_to_4(page + FIL_PAGE_OFFSET, buf_dblwr->block2 + i);
+ buf_flush_init_for_writing(NULL, page, NULL, 0);
+ page += UNIV_PAGE_SIZE;
+ }
+
+ page = page_align((byte*)page_unaligned + UNIV_PAGE_SIZE);
+
+ const page_id_t page_id2(TRX_SYS_SPACE, buf_dblwr->block2);
+
+ fil_io(write_request, true, page_id2, univ_page_size, 0,
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+ (void*) page, NULL);
+
+ ut_free(page_unaligned);
+}
+
/**
At database startup initializes the doublewrite buffer memory structure if
we already have a doublewrite buffer created in the data files. If we are
@@ -354,7 +435,8 @@ recovery, this function loads the pages from double write buffer into memory.
dberr_t
buf_dblwr_init_or_load_pages(
pfs_os_file_t file,
- const char* path)
+ const char* path,
+ bool load_corrupt_pages)
{
byte* buf;
byte* page;
@@ -366,6 +448,7 @@ buf_dblwr_init_or_load_pages(
byte* unaligned_read_buf;
ibool reset_space_ids = FALSE;
recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
+ bool header_found = false;
/* We do the file i/o past the buffer pool */
@@ -469,9 +552,57 @@ buf_dblwr_init_or_load_pages(
page = buf;
- for (ulint i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
+ /* First check if the first page is of type FIL_PAGE_TYPE_DBLWR_HEADER.
+ * If so, this means that the last time the doublewrite was used in
+ * reduced doublewrite mode (innodb_doublewrite=2).
+ */
+ if (fil_page_get_type(page) == FIL_PAGE_TYPE_DBLWR_HEADER) {
+ header_found = TRUE;
+ }
+
+ if (header_found && load_corrupt_pages) {
+ byte* ptr = page + FIL_PAGE_DATA;
+ ulint num_pages;
+ const page_size_t& page_size = page_size_t(BUF_DBLWR_HEADER_SIZE, BUF_DBLWR_HEADER_SIZE, true);
+
+ ut_a(!reset_space_ids);
+
+ if (buf_page_is_corrupted(
+ FALSE, page, page_size, NULL)) {
+ ib::error()
+ << "InnoDB: The first block of the doublewrite "
+ << "buffer is corrupt.";
+ buf_page_print(
+ page,
+ page_size,
+ BUF_PAGE_PRINT_NO_CRASH);
+ ut_error;
+ }
+
+ num_pages = mach_read_from_2(ptr);
+ ptr += 2;
+
+ for (ulint i = 0; i < num_pages; ++i) {
+ ulint space_id = mach_read_from_4(ptr);
+ ptr += 4;
+ ulint page_no = mach_read_from_4(ptr);
+ ptr += 4;
+ recv_dblwr.add(NULL, space_id, page_no);
+ }
+ }
+
+ if (header_found) {
+ page += univ_page_size.physical();
+ }
+
+ /* We go through all of the pages in the doublewrite buffer even if
+ * we found the header page.
+ */
+ for (ulint i = (header_found ? 1 : 0);
+ i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
if (reset_space_ids) {
ulint source_page_no;
+ ut_a(!header_found);
space_id = 0;
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
@@ -493,6 +624,7 @@ buf_dblwr_init_or_load_pages(
write_request, path, file, page,
source_page_no * UNIV_PAGE_SIZE,
UNIV_PAGE_SIZE);
+
if (err != DB_SUCCESS) {
ib::error()
@@ -504,10 +636,15 @@ buf_dblwr_init_or_load_pages(
return(err);
}
- } else if (memcmp(field_ref_zero, page + FIL_PAGE_LSN, 8)) {
+ } else if (load_corrupt_pages
+ && memcmp(field_ref_zero, page + FIL_PAGE_LSN, 8)) {
/* Each valid page header must contain
a nonzero FIL_PAGE_LSN field. */
- recv_dblwr.add(page);
+ ulint space_id =
+ mach_read_from_4(page + FIL_PAGE_SPACE_ID);
+ ulint page_no =
+ mach_read_from_4(page + FIL_PAGE_OFFSET);
+ recv_dblwr.add(page, space_id, page_no);
}
page += univ_page_size.physical();
@@ -529,7 +666,7 @@ buf_dblwr_process()
ulint page_no_dblwr = 0;
byte* read_buf;
byte* unaligned_read_buf;
- recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
+ std::list<recv_dblwr_item_t, ut_allocator<recv_dblwr_item_t> >& dblwr_pages = recv_sys->dblwr.pages;
if (!buf_dblwr) {
return;
@@ -541,12 +678,11 @@ buf_dblwr_process()
read_buf = static_cast<byte*>(
ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
- for (recv_dblwr_t::list::iterator i = recv_dblwr.pages.begin();
- i != recv_dblwr.pages.end();
- ++i, ++page_no_dblwr) {
- byte* page = *i;
- ulint space_id = page_get_space_id(page);
- fil_space_t* space = fil_space_get(space_id);
+ for (std::list<recv_dblwr_item_t>::iterator i = dblwr_pages.begin();
+ i != dblwr_pages.end(); ++i, ++page_no_dblwr ) {
+ byte* page = const_cast<byte*>(i->page);
+ fil_space_t* space = fil_space_get(i->space_id);
+ ulint space_id = i->space_id;
if (space == NULL) {
/* Maybe we have dropped the tablespace
@@ -556,7 +692,7 @@ buf_dblwr_process()
fil_space_open_if_needed(space);
- const ulint page_no = page_get_page_no(page);
+ const ulint page_no = i->page_no;
const page_id_t page_id(space_id, page_no);
if (page_no >= space->size) {
@@ -575,7 +711,6 @@ buf_dblwr_process()
}
const page_size_t page_size(space->flags);
- ut_ad(!buf_page_is_zeroes(page, page_size));
/* We want to ensure that for partial reads the
unread portion of the page is NUL. */
@@ -632,6 +767,21 @@ buf_dblwr_process()
<< " from the doublewrite buffer.";
}
+ if (!page) {
+ /* Theoretically we could have another good
+ copy for this page in the doublewrite
+ buffer. If not, we will report a fatal error
+ for a corrupted page somewhere else if that
+ page was truly needed. */
+
+ ib::warn() << "Cannot recover page " << page_id
+ << " from the doublewrite buffer"
+ << " because it was written in reduced-doublewrite mode";
+ continue;
+ }
+
+ ut_ad(!buf_page_is_zeroes(page, page_size));
+
/* Next, validate the doublewrite page. */
if (fil_page_is_compressed_encrypted(page) ||
fil_page_is_compressed(page)) {
@@ -684,7 +834,7 @@ buf_dblwr_process()
<< " from the doublewrite buffer.";
}
- recv_dblwr.pages.clear();
+ dblwr_pages.clear();
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
ut_free(unaligned_read_buf);
@@ -704,6 +854,9 @@ buf_dblwr_free()
os_event_destroy(buf_dblwr->s_event);
ut_free(buf_dblwr->write_buf_unaligned);
buf_dblwr->write_buf_unaligned = NULL;
+ ut_free(buf_dblwr->header_unaligned);
+ buf_dblwr->header_unaligned = NULL;
+ buf_dblwr->header = NULL;
ut_free(buf_dblwr->buf_block_arr);
buf_dblwr->buf_block_arr = NULL;
@@ -953,8 +1106,10 @@ buf_dblwr_flush_buffered_writes()
byte* write_buf;
ulint first_free;
ulint len;
+ byte* header_ptr;
+ ulong use_doublewrite_buf = srv_use_doublewrite_buf;
- if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
+ if (!use_doublewrite_buf || buf_dblwr == NULL) {
/* Sync the writes to the disk. */
buf_dblwr_sync_datafiles();
return;
@@ -999,6 +1154,31 @@ try_again:
start another batch of flushing. */
buf_dblwr->batch_running = true;
first_free = buf_dblwr->first_free;
+ /* Reset the doublewrite buffer if srv_doublewrite_reset is set.
+ * This protects against the following scenario:
+ * 1- server starts with full(=1) doublewrite mode and writes a bunch
+ * of pages to the doublewrite buffer.
+ * 2- user changes doublewrite mode from full(=1) to reduced(=2).
+ * 3- server runs for a long time in the reduced doublewrite mode so
+ * that the copies that were written to the doublewrite buffer in step
+ * 1 become stale.
+ * 4- some of the non-doublewrite pages on disk whose copies in the
+ * doublewrite buffer became stale get corrupted because of a hardware
+ * or a software failure.
+ * 5- server crashes. During recovery InnoDB processes pages both
+ * in the doublewrite header and the following full pages.
+ * 6- The stale copies in the doublewrite buffer are used to restore
+ * corrupt non-doublewrite pages on disk. Now the stale data will be
+ * served when these pages are accessed.
+ * This is a rare case because it needs the corruption to happen to one
+ * of the pages written to the doublewrite buffer in full mode. We
+ * nevertheless protect against this case by resetting the doublewrite
+ * buffer on disk, when the doublewrite mode changes.
+ */
+ if (srv_doublewrite_reset) {
+ buf_dblwr_reset(use_doublewrite_buf);
+ srv_doublewrite_reset = FALSE;
+ }
/* Now safe to release the mutex. Note that though no other
thread is allowed to post to the doublewrite batch flushing
@@ -1007,14 +1187,23 @@ try_again:
mutex_exit(&buf_dblwr->mutex);
write_buf = buf_dblwr->write_buf;
+ header_ptr = buf_dblwr->header + FIL_PAGE_DATA;
+ memset(header_ptr, 0, BUF_DBLWR_HEADER_SIZE - FIL_PAGE_DATA);
+ mach_write_to_2(header_ptr, buf_dblwr->first_free);
+ header_ptr += 2;
for (ulint len2 = 0, i = 0;
i < buf_dblwr->first_free;
len2 += UNIV_PAGE_SIZE, i++) {
- const buf_block_t* block;
-
+ const buf_block_t* block;
block = (buf_block_t*) buf_dblwr->buf_block_arr[i];
+ const buf_page_t* page = &block->page;
+
+ mach_write_to_4(header_ptr, page->id.space());
+ header_ptr += 4;
+ mach_write_to_4(header_ptr, page->id.page_no());
+ header_ptr += 4;
if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
|| block->page.zip.data) {
@@ -1033,14 +1222,30 @@ try_again:
}
/* Write out the first block of the doublewrite buffer */
- len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
- buf_dblwr->first_free) * UNIV_PAGE_SIZE;
+ if (use_doublewrite_buf == 2) {
+ ib_uint32_t checksum = page_zip_calc_checksum(
+ buf_dblwr->header, BUF_DBLWR_HEADER_SIZE,
+ static_cast<srv_checksum_algorithm_t>(
+ srv_checksum_algorithm));
+
+ const page_size_t& page_size = page_size_t(BUF_DBLWR_HEADER_SIZE, BUF_DBLWR_HEADER_SIZE, true);
+ mach_write_to_4(buf_dblwr->header + FIL_PAGE_SPACE_OR_CHKSUM,
+ checksum);
+ len = BUF_DBLWR_HEADER_SIZE;
+ write_buf = buf_dblwr->header;
+ ut_ad(!buf_page_is_corrupted(FALSE, write_buf, page_size, NULL));
+ } else {
+ len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
+ buf_dblwr->first_free) * UNIV_PAGE_SIZE;
+ }
fil_io(IORequestWrite, true,
page_id_t(TRX_SYS_SPACE, buf_dblwr->block1), univ_page_size,
0, len, (void*) write_buf, NULL);
- if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+
+ if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+ || use_doublewrite_buf == 2) {
/* No unwritten pages in the second block. */
goto flush;
}
@@ -1058,7 +1263,12 @@ try_again:
flush:
/* increment the doublewrite flushed pages counter */
- srv_stats.dblwr_pages_written.add(buf_dblwr->first_free);
+ if (use_doublewrite_buf == 1) {
+ srv_stats.dblwr_pages_written.add(buf_dblwr->first_free);
+ } else {
+ srv_stats.dblwr_pages_written.inc();
+ }
+
srv_stats.dblwr_writes.inc();
/* Now flush the doublewrite buffer data to disk */
diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc
index e4bb11c9a22..aca84b9cfd4 100644
--- a/storage/innobase/fsp/fsp0sysspace.cc
+++ b/storage/innobase/fsp/fsp0sysspace.cc
@@ -571,7 +571,7 @@ SysTablespace::read_lsn_and_check_flags(lsn_t* flushed_lsn)
ut_a(it->order() == 0);
- buf_dblwr_init_or_load_pages(it->handle(), it->filepath());
+ buf_dblwr_init_or_load_pages(it->handle(), it->filepath(), true);
/* Check the contents of the first page of the
first datafile. */
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 68b69e94681..fc42d016698 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -254,7 +254,6 @@ values */
static my_bool innobase_file_format_check;
static my_bool innobase_use_atomic_writes;
static my_bool innobase_use_fallocate;
-static my_bool innobase_use_doublewrite;
static my_bool innobase_use_checksums;
static my_bool innobase_locks_unsafe_for_binlog;
static my_bool innobase_rollback_on_timeout;
@@ -4279,8 +4278,6 @@ innobase_change_buffering_inited_ok:
srv_n_read_io_threads = (ulint) innobase_read_io_threads;
srv_n_write_io_threads = (ulint) innobase_write_io_threads;
- srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
-
if (!innobase_use_checksums) {
ib::warn() << "Setting innodb_checksums to OFF is DEPRECATED."
" This option may be removed in future releases. You"
@@ -18086,6 +18083,38 @@ innodb_io_capacity_max_update(
}
/****************************************************************//**
+Update the system variable innodb_doublewrite using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_doublewrite_update(
+/*======================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ ulong in_val = *static_cast<const ulong*>(save);
+ if (!in_val || !srv_use_doublewrite_buf) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "innodb_doublewrite can not be "
+ "dynamically changed to or from 0. "
+ "Do a clean shutdown if you want to "
+ "change it from or to 0.");
+ } else {
+ ut_a(in_val == 1 || in_val == 2);
+ if (srv_use_doublewrite_buf != in_val) {
+ srv_use_doublewrite_buf = in_val;
+ srv_doublewrite_reset = 1;
+ }
+ }
+}
+
+/****************************************************************//**
Update the system variable innodb_io_capacity using the "saved"
value. This function is registered as a callback with MySQL. */
static
@@ -20530,11 +20559,12 @@ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
"The common part for InnoDB table spaces.",
NULL, NULL, NULL);
-static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Enable InnoDB doublewrite buffer (enabled by default)."
- " Disable with --skip-innodb-doublewrite.",
- NULL, NULL, TRUE);
+static MYSQL_SYSVAR_ULONG(doublewrite, srv_use_doublewrite_buf,
+ PLUGIN_VAR_OPCMDARG,
+ "0=Disable InnoDB doublewrite buffer."
+ "1=Enable full doublewrite mode (default)."
+ "2=Enable reduced doublewrite mode.",
+ NULL, innodb_doublewrite_update, 1, 0, 2, 0);
static MYSQL_SYSVAR_BOOL(use_atomic_writes, innobase_use_atomic_writes,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 8f04f9d15ee..87fa470f897 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -112,6 +112,7 @@ static buf_page_desc_t i_s_page_type[] = {
{"IBUF_INDEX", I_S_PAGE_TYPE_IBUF},
{"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED},
{"PAGE COMPRESSED AND ENCRYPTED", FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED},
+ {"DOUBLEWRITE HEADER", FIL_PAGE_TYPE_DBLWR_HEADER},
};
/** This structure defines information we will fetch from pages
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index 598609e2be4..f560429feb9 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -37,6 +37,9 @@ Created 2011/12/19 Inaam Rana
extern buf_dblwr_t* buf_dblwr;
/** Set to TRUE when the doublewrite buffer is being created */
extern ibool buf_dblwr_being_created;
+/** The size of the doublewrite header page when the reduced-doublewrite mode
+is used. */
+#define BUF_DBLWR_HEADER_SIZE 4096
/** Create the doublewrite buffer if the doublewrite buffer header
is not present in the TRX_SYS page.
@@ -59,7 +62,8 @@ recovery, this function loads the pages from double write buffer into memory.
dberr_t
buf_dblwr_init_or_load_pages(
pfs_os_file_t file,
- const char* path);
+ const char* path,
+ bool load_corrupt_pages);
/** Process and remove the double write buffer pages for all tablespaces. */
void
@@ -157,6 +161,11 @@ struct buf_dblwr_t{
buf_page_t** buf_block_arr;/*!< array to store pointers to
the buffer blocks which have been
cached to write_buf */
+ byte* header;/*!< write buffer used for writing out the
+ doublewrite header for reduced doublewrite
+ mode (innodb_doublewrite=2) */
+ byte* header_unaligned;/*!< pointer to header,
+ but unaligned */
};
#endif
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index bf231565657..87f94c0234e 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -392,6 +392,10 @@ extern fil_addr_t fil_addr_null;
#define FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED 37401 /*!< Page is compressed and
then encrypted */
#define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< page compressed page */
+/*!< First page of the double write buffer holds the
+space ids and the page numbers for the most recently
+flushed pages. */
+#define FIL_PAGE_TYPE_DBLWR_HEADER 32124 /*!< Doublewrite header */
#define FIL_PAGE_INDEX 17855 /*!< B-tree node */
#define FIL_PAGE_RTREE 17854 /*!< B-tree node */
#define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */
diff --git a/storage/innobase/include/fil0fil.ic b/storage/innobase/include/fil0fil.ic
index 9505cc0bd69..d3f26334f3b 100644
--- a/storage/innobase/include/fil0fil.ic
+++ b/storage/innobase/include/fil0fil.ic
@@ -35,6 +35,8 @@ fil_get_page_type_name(
ulint page_type) /*!< in: FIL_PAGE_TYPE */
{
switch(page_type) {
+ case FIL_PAGE_TYPE_DBLWR_HEADER:
+ return "PAGE_TYPE_DBLWR_HEADER";
case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
return "PAGE_COMPRESSED_ENRYPTED";
case FIL_PAGE_PAGE_COMPRESSED:
@@ -88,6 +90,7 @@ fil_page_type_validate(
/* Validate page type */
if (!((page_type == FIL_PAGE_PAGE_COMPRESSED ||
page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED ||
+ page_type == FIL_PAGE_TYPE_DBLWR_HEADER ||
page_type == FIL_PAGE_INDEX ||
page_type == FIL_PAGE_RTREE ||
page_type == FIL_PAGE_UNDO_LOG ||
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index 24ad9ae2a30..29b268fcff7 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -173,10 +173,20 @@ struct recv_addr_t{
hash_node_t addr_hash;/*!< hash node in the hash bucket chain */
};
+struct recv_dblwr_item_t {
+ const byte* page;
+ ulint space_id;
+ ulint page_no;
+};
+
struct recv_dblwr_t {
/** Add a page frame to the doublewrite recovery buffer. */
- void add(byte* page) {
- pages.push_back(page);
+ void add(const byte* page, ulint space_id, ulint page_no) {
+ recv_dblwr_item_t item;
+ item.page = page;
+ item.space_id = space_id;
+ item.page_no = page_no;
+ pages.push_back(item);
}
/** Find a doublewrite copy of a page.
@@ -186,10 +196,11 @@ struct recv_dblwr_t {
@retval NULL if no page was found */
const byte* find_page(ulint space_id, ulint page_no);
- typedef std::list<byte*, ut_allocator<byte*> > list;
+ std::list<recv_dblwr_item_t, ut_allocator<recv_dblwr_item_t> > pages;
- /** Recovered doublewrite buffer page frames */
- list pages;
+ void operator() () {
+ pages.clear();
+ }
};
/** Recovery system data structure */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 891f25f68f1..f88669bdd7d 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -486,7 +486,8 @@ extern my_bool srv_stats_include_delete_marked;
extern unsigned long long srv_stats_modified_counter;
extern my_bool srv_stats_sample_traditional;
-extern ibool srv_use_doublewrite_buf;
+extern ulong srv_use_doublewrite_buf;
+extern my_bool srv_doublewrite_reset;
extern ulong srv_doublewrite_batch_size;
extern ulong srv_checksum_algorithm;
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index dc8977e49c8..814d4e4804a 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -3500,14 +3500,14 @@ recv_dblwr_t::find_page(ulint space_id, ulint page_no)
{
typedef std::vector<const byte*, ut_allocator<const byte*> >
matches_t;
-
matches_t matches;
const byte* result = 0;
- for (list::iterator i = pages.begin(); i != pages.end(); ++i) {
- if (page_get_space_id(*i) == space_id
- && page_get_page_no(*i) == page_no) {
- matches.push_back(*i);
+ for (std::list<recv_dblwr_item_t>::iterator i = pages.begin(); i != pages.end(); ++i) {
+ if (i->page
+ && (page_get_space_id(i->page) == space_id)
+ && (page_get_page_no(i->page) == page_no)) {
+ matches.push_back(i->page);
}
}
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 2894be6b12c..3999f7bc90a 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -392,8 +392,8 @@ unsigned long long srv_stats_modified_counter;
based on number of configured pages */
my_bool srv_stats_sample_traditional;
-/** copy of innodb_doublewrite */
-ibool srv_use_doublewrite_buf;
+ulong srv_use_doublewrite_buf = 1;
+my_bool srv_doublewrite_reset = FALSE;
/** innodb_doublewrite_batch_size (a debug parameter) specifies the
number of pages to use in LRU and flush_list batch flushing.