diff options
23 files changed, 867 insertions, 84 deletions
diff --git a/mysql-test/suite/innodb/include/innodb_doublewrite.inc b/mysql-test/suite/innodb/include/innodb_doublewrite.inc new file mode 100644 index 00000000000..d537280d178 --- /dev/null +++ b/mysql-test/suite/innodb/include/innodb_doublewrite.inc @@ -0,0 +1,257 @@ +# Embedded server does not support crashing +--source include/not_embedded.inc +--source include/have_innodb.inc +--source include/innodb_page_size.inc +--source include/have_debug.inc + +call mtr.add_suppression("InnoDB: Failed to set O_DIRECT on file.*"); +call mtr.add_suppression("InnoDB: Cannot recover page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] from the doublewrite buffer because it was written in reduced-doublewrite mode"); +call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace .*"); +call mtr.add_suppression("InnoDB: Failed to read file .* at offset .*: Page read from tablespace is corrupted."); +call mtr.add_suppression("InnoDB: Table .* is corrupted. Please drop the table and recreate."); + +# Slow shutdown and restart to make sure ibuf merge is finished +SET GLOBAL innodb_fast_shutdown = 0; + +let INNODB_PAGE_SIZE=`select @@innodb_page_size`; +let MYSQLD_DATADIR=`select @@datadir`; + +SET GLOBAL innodb_doublewrite=2; + +show variables like 'innodb_doublewrite'; +show variables like 'innodb_fil_make_page_dirty_debug'; +show variables like 'innodb_saved_page_number_debug'; + +CREATE TABLE t1(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb; +start transaction; +INSERT INTO t1(b) VALUES(repeat('#',200)); +INSERT INTO t1(b) VALUES(repeat('+',200)); +INSERT INTO t1(b) VALUES(repeat('/',200)); +INSERT INTO t1(b) VALUES(repeat('|',200)); +INSERT INTO t1(b) VALUES(repeat('\\',200)); +INSERT INTO t1(b) VALUES(repeat('-',200)); +INSERT INTO t1(b) VALUES(repeat('&',200)); +INSERT INTO t1(b) VALUES(repeat('%',200)); +INSERT INTO t1(b) VALUES(repeat('@',200)); +INSERT INTO t1(b) VALUES(repeat('?',200)); +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +commit work; + +select space from information_schema.innodb_sys_tables where name = 'test/t1' into @space_id; +let SPACE_ID=`select space from information_schema.innodb_sys_tables where name = 'test/t1'`; + +--echo # Ensure that dirty pages of table t1 is flushed. +flush tables t1 for export; +unlock tables; + +begin; +insert into t1(b) values (repeat('_', 42)); + +--source ../include/no_checkpoint_start.inc + +--echo # Make the first page dirty for table t1 +set global innodb_saved_page_number_debug = 0; +set global innodb_fil_make_page_dirty_debug = @space_id; + +--echo # Ensure that dirty pages of table t1 are flushed. +set global innodb_buf_flush_list_now = 1; + +--let CLEANUP_IF_CHECKPOINT=drop table t1; +--source ../include/no_checkpoint_end.inc + +--echo # Backup table and system tablespace before corrupting +--copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/test/t1.ibd.backup +--copy_file $MYSQLD_DATADIR/ibdata1 $MYSQLD_DATADIR/ibdata1.backup + +# +# Corrupt page 5 from table t1 and write page no 5 to first doublewrite +# buffer +# +perl; +use IO::Handle; +use Data::HexDump; +my $fname= "$ENV{'MYSQLD_DATADIR'}test/t1.ibd"; +my $page_size = $ENV{INNODB_PAGE_SIZE}; +my $tspace_id = $ENV{SPACE_ID}; +my $page; + +open(FILE, "+<", $fname) or die "Unable to open $fname\n";; +binmode FILE; +sysseek(FILE, 5 * $page_size + 48, 0)||die "Unable to seek $fname\n"; +print FILE pack("H*", "deadbeefdeadbeefdeadbeefdeadbeef"); +close FILE or die "Unable to close $fname\n"; + +open(FILE, "+<", "$ENV{MYSQLD_DATADIR}ibdata1")||die "cannot open ibdata1\n"; +sysseek(FILE, 6 * $page_size - 190, 0)||die "Unable to seek ibdata1\n"; +sysread(FILE, $_, 12) == 12||die "Unable to read TRX_SYS\n"; +my($magic,$d1,$d2)=unpack "NNN", $_; +die "magic=$magic, $d1, $d2\n" unless $magic == 536853855 && $d2 >= $d1 + 64; +my($offset)=$d1*$page_size; +# Find and read the page type from first page in the doublewrite buffer +sysseek(FILE, $offset, 0)||die "Unable to seek ibdata1\n"; +sysread(FILE, $_, 4096)== 4096||die "Cannot read doublewrite\n"; +sysseek(FILE, $offset + 24, 0)||die "Unable to seek ibdata1\n"; +sysread(FILE, $_, 2) == 2||die "Unable to read FIL_PAGE_TYPE\n"; +my($pagetype)=unpack "n", $_; +die "Not reduced doublewrite page page_type=$pagetype\n" unless $pagetype == 32124; +# Find and read the space_id + page_no from first doublewrite page +sysseek(FILE, $offset + 38, 0)||die "Unable to seek ibdata1\n"; +sysread(FILE, $_, 10) == 10||die "Unable to read doublewrite buf\n"; +my($first_free,$space_id,$page_no)=unpack ("nNN", $_); +# Write space_id + page_no = 5 to first doublewrite page +sysseek(FILE, $offset, 0)||die "Unable to seek ibdata1\n"; +sysread(FILE, $_, $page_size)==$page_size||die "Cannot read doublewrite\n"; +sysseek(FILE, $offset, 0)||die "Unable to seek ibdata1\n"; +substr ($_, 38, 10) = pack("nNN", 1, $tspace_id, 5); +# Replace the innodb_checksum_algorithm=none checksum +substr ($_, 0, 4) = pack("N", 0xdeadbeef); +substr ($_, $page_size - 8, 4) = pack("N", 0xdeadbeef); +syswrite(FILE, $_, $page_size)==$page_size||die; +close(FILE); +exit 0; +EOF + +--enable_reconnect +# Write file to make mysql-test-run.pl start up the server again +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--error 1 +--source include/wait_until_connected_again.inc + +--error 1932 +SELECT * FROM t1; + +--source include/shutdown_mysqld.inc + +--let SEARCH_RANGE = 10000000 +--let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err +--let SEARCH_PATTERN=buffer because it was written in reduced-doublewrite mode +--replace_regex /FOUND [1-9][0-9]*/ FOUND 1/ +--source include/search_pattern_in_file.inc +--let SEARCH_PATTERN=Database page corruption on disk or a failed file read of tables +--replace_regex /FOUND [1-9][0-9]*/ FOUND 1/ +--source include/search_pattern_in_file.inc + +--echo # Backup table and system tablespace BACK +--move_file $MYSQLD_DATADIR/test/t1.ibd.backup $MYSQLD_DATADIR/test/t1.ibd +--move_file $MYSQLD_DATADIR/ibdata1.backup $MYSQLD_DATADIR/ibdata1 + +--source include/start_mysqld.inc + +CHECK TABLE t1; +SELECT COUNT(*) FROM t1; + +SET GLOBAL innodb_doublewrite=1; +CREATE TABLE t2(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb; +start transaction; +INSERT INTO t2(b) VALUES(repeat('#',200)); +INSERT INTO t2(b) VALUES(repeat('+',200)); +INSERT INTO t2(b) VALUES(repeat('/',200)); +INSERT INTO t2(b) VALUES(repeat('|',200)); +INSERT INTO t2(b) VALUES(repeat('\\',200)); +INSERT INTO t2(b) VALUES(repeat('-',200)); +INSERT INTO t2(b) VALUES(repeat('&',200)); +INSERT INTO t2(b) VALUES(repeat('%',200)); +INSERT INTO t2(b) VALUES(repeat('@',200)); +INSERT INTO t2(b) VALUES(repeat('?',200)); +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +commit work; + +select space from information_schema.innodb_sys_tables where name = 'test/t2' into @space_id; +let SPACE_ID=`select space from information_schema.innodb_sys_tables where name = 'test/t2'`; + +--echo # Ensure that dirty pages of table t2 is flushed. +flush tables t2 for export; +unlock tables; + +begin; +insert into t2(b) values (repeat('_', 42)); + +--source ../include/no_checkpoint_start.inc + +--echo # Make the first page dirty for table t2 +set global innodb_saved_page_number_debug = 0; +set global innodb_fil_make_page_dirty_debug = @space_id; + +--echo # Ensure that dirty pages of table t2 are flushed. +set global innodb_buf_flush_list_now = 1; + +--let CLEANUP_IF_CHECKPOINT=drop table t2; +--source ../include/no_checkpoint_end.inc + +# +# Write page 5 from t2.ibd to first doublewrite buffer page +# and then corrupt the page 5 from t2.ibd. +# +perl; +use IO::Handle; +my $fname= "$ENV{'MYSQLD_DATADIR'}test/t2.ibd"; +my $page_size = $ENV{INNODB_PAGE_SIZE}; +my $tspace_id = $ENV{SPACE_ID}; +my $page; +my $page2; + +open(FILE, "+<", "$ENV{MYSQLD_DATADIR}ibdata1")||die "cannot open ibdata1\n"; +sysseek(FILE, 6 * $page_size - 190, 0)||die "Unable to seek ibdata1\n"; +sysread(FILE, $_, 12) == 12||die "Unable to read TRX_SYS\n"; +my($magic,$d1,$d2)=unpack "NNN", $_; +die "magic=$magic, $d1, $d2\n" unless $magic == 536853855 && $d2 >= $d1 + 64; +my($offset)=$d1*$page_size; +# Find and read the page type from first page in the doublewrite buffer +sysseek(FILE, $offset, 0)||die "Unable to seek ibdata1\n"; +sysread(FILE, $_, 4096)== 4096||die "Cannot read doublewrite\n"; +sysseek(FILE, $offset + 24, 0)||die "Unable to seek ibdata1\n"; +sysread(FILE, $_, 2) == 2||die "Unable to read FIL_PAGE_TYPE\n"; +my($pagetype)=unpack "n", $_; +die "Not full doublewrite page page_type=$pagetype\n" unless $pagetype != 32124; +sysseek(FILE, $offset, 0)||die "Unable to seek ibdata1\n"; +# Read page 5 from t2.ibd +open(FILE2, "+<", $fname) or die "Unable to open $fname\n";; +binmode FILE2; +sysseek(FILE2, 5 * $page_size, 0)||die "Unable to seek $fname\n"; +sysread(FILE2, $page2, $page_size)==$page_size||die "Cannot read page from $fname\n"; +# Write page 5 from t2.ibd to first doublewrite buffer page +syswrite(FILE, $page2, $page_size)==$page_size||die "Cannot write doublewrite page to ibdata1\n"; +close(FILE); +close(FILE2); +# Corrupt page 5 from t2.ibd +open(FILE, "+<", $fname) or die "Unable to open $fname\n";; +binmode FILE; +sysseek(FILE, 5 * $page_size + 50, 0)||die "Unable to seek $fname\n"; +print FILE pack("H*", "deadbeefdeadbeefdeadbeefdddddddddddffffffffffffeeeeeeeeeeeebbbbbbbbbbbb"); +close(FILE); +exit 0; +EOF + +--source include/start_mysqld.inc + +CHECK TABLE t1; +CHECK TABLE t2; + +SELECT COUNT(*) FROM t1; +SELECT COUNT(*) FROM t2; + +DROP TABLE t1; +DROP TABLE t2; + +--let SEARCH_RANGE = 10000000 +--let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err +--let SEARCH_PATTERN=Trying to recover page +--replace_regex /FOUND [1-9][0-9]*/ FOUND 1/ +--source include/search_pattern_in_file.inc +--let SEARCH_PATTERN=Recovered page +--replace_regex /FOUND [1-9][0-9]*/ FOUND 1/ +--source include/search_pattern_in_file.inc diff --git a/mysql-test/suite/innodb/r/doublewrite.result b/mysql-test/suite/innodb/r/doublewrite.result index 61c81ee9dff..62421ddd312 100644 --- a/mysql-test/suite/innodb/r/doublewrite.result +++ b/mysql-test/suite/innodb/r/doublewrite.result @@ -6,7 +6,7 @@ SET GLOBAL innodb_fast_shutdown = 0; show variables like 'innodb_doublewrite'; Variable_name Value -innodb_doublewrite ON +innodb_doublewrite 1 show variables like 'innodb_fil_make_page_dirty_debug'; Variable_name Value innodb_fil_make_page_dirty_debug 0 diff --git a/mysql-test/suite/innodb/r/innodb_doublewrite.result b/mysql-test/suite/innodb/r/innodb_doublewrite.result new file mode 100644 index 00000000000..8bd112f9dcd --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_doublewrite.result @@ -0,0 +1,111 @@ +call mtr.add_suppression("InnoDB: Failed to set O_DIRECT on file.*"); +call mtr.add_suppression("InnoDB: Cannot recover page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] from the doublewrite buffer because it was written in reduced-doublewrite mode"); +call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace .*"); +call mtr.add_suppression("InnoDB: Failed to read file .* at offset .*: Page read from tablespace is corrupted."); +call mtr.add_suppression("InnoDB: Table .* is corrupted. Please drop the table and recreate."); +SET GLOBAL innodb_fast_shutdown = 0; +SET GLOBAL innodb_doublewrite=2; +show variables like 'innodb_doublewrite'; +Variable_name Value +innodb_doublewrite 2 +show variables like 'innodb_fil_make_page_dirty_debug'; +Variable_name Value +innodb_fil_make_page_dirty_debug 0 +show variables like 'innodb_saved_page_number_debug'; +Variable_name Value +innodb_saved_page_number_debug 0 +CREATE TABLE t1(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb; +start transaction; +INSERT INTO t1(b) VALUES(repeat('#',200)); +INSERT INTO t1(b) VALUES(repeat('+',200)); +INSERT INTO t1(b) VALUES(repeat('/',200)); +INSERT INTO t1(b) VALUES(repeat('|',200)); +INSERT INTO t1(b) VALUES(repeat('\\',200)); +INSERT INTO t1(b) VALUES(repeat('-',200)); +INSERT INTO t1(b) VALUES(repeat('&',200)); +INSERT INTO t1(b) VALUES(repeat('%',200)); +INSERT INTO t1(b) VALUES(repeat('@',200)); +INSERT INTO t1(b) VALUES(repeat('?',200)); +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +commit work; +select space from information_schema.innodb_sys_tables where name = 'test/t1' into @space_id; +# Ensure that dirty pages of table t1 is flushed. +flush tables t1 for export; +unlock tables; +begin; +insert into t1(b) values (repeat('_', 42)); +# Make the first page dirty for table t1 +set global innodb_saved_page_number_debug = 0; +set global innodb_fil_make_page_dirty_debug = @space_id; +# Ensure that dirty pages of table t1 are flushed. +set global innodb_buf_flush_list_now = 1; +# Kill the server +# Backup table and system tablespace before corrupting +SELECT * FROM t1; +ERROR 42S02: Table 'test.t1' doesn't exist in engine + FOUND 1 /buffer because it was written in reduced-doublewrite mode/ in mysqld.1.err + FOUND 1 /Database page corruption on disk or a failed file read of tables/ in mysqld.1.err +# Backup table and system tablespace BACK +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT COUNT(*) FROM t1; +COUNT(*) +2560 +SET GLOBAL innodb_doublewrite=1; +CREATE TABLE t2(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb; +start transaction; +INSERT INTO t2(b) VALUES(repeat('#',200)); +INSERT INTO t2(b) VALUES(repeat('+',200)); +INSERT INTO t2(b) VALUES(repeat('/',200)); +INSERT INTO t2(b) VALUES(repeat('|',200)); +INSERT INTO t2(b) VALUES(repeat('\\',200)); +INSERT INTO t2(b) VALUES(repeat('-',200)); +INSERT INTO t2(b) VALUES(repeat('&',200)); +INSERT INTO t2(b) VALUES(repeat('%',200)); +INSERT INTO t2(b) VALUES(repeat('@',200)); +INSERT INTO t2(b) VALUES(repeat('?',200)); +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +commit work; +select space from information_schema.innodb_sys_tables where name = 'test/t2' into @space_id; +# Ensure that dirty pages of table t2 is flushed. +flush tables t2 for export; +unlock tables; +begin; +insert into t2(b) values (repeat('_', 42)); +# Make the first page dirty for table t2 +set global innodb_saved_page_number_debug = 0; +set global innodb_fil_make_page_dirty_debug = @space_id; +# Ensure that dirty pages of table t2 are flushed. +set global innodb_buf_flush_list_now = 1; +# Kill the server +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +CHECK TABLE t2; +Table Op Msg_type Msg_text +test.t2 check status OK +SELECT COUNT(*) FROM t1; +COUNT(*) +2560 +SELECT COUNT(*) FROM t2; +COUNT(*) +2560 +DROP TABLE t1; +DROP TABLE t2; + FOUND 1 /Trying to recover page/ in mysqld.1.err + FOUND 1 /Recovered page/ in mysqld.1.err diff --git a/mysql-test/suite/innodb/r/innodb_doublewrite_odirect.result b/mysql-test/suite/innodb/r/innodb_doublewrite_odirect.result new file mode 100644 index 00000000000..8bd112f9dcd --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_doublewrite_odirect.result @@ -0,0 +1,111 @@ +call mtr.add_suppression("InnoDB: Failed to set O_DIRECT on file.*"); +call mtr.add_suppression("InnoDB: Cannot recover page \\[page id: space=[1-9][0-9]*, page number=[1-9][0-9]*\\] from the doublewrite buffer because it was written in reduced-doublewrite mode"); +call mtr.add_suppression("InnoDB: Database page corruption on disk or a failed file read of tablespace .*"); +call mtr.add_suppression("InnoDB: Failed to read file .* at offset .*: Page read from tablespace is corrupted."); +call mtr.add_suppression("InnoDB: Table .* is corrupted. Please drop the table and recreate."); +SET GLOBAL innodb_fast_shutdown = 0; +SET GLOBAL innodb_doublewrite=2; +show variables like 'innodb_doublewrite'; +Variable_name Value +innodb_doublewrite 2 +show variables like 'innodb_fil_make_page_dirty_debug'; +Variable_name Value +innodb_fil_make_page_dirty_debug 0 +show variables like 'innodb_saved_page_number_debug'; +Variable_name Value +innodb_saved_page_number_debug 0 +CREATE TABLE t1(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb; +start transaction; +INSERT INTO t1(b) VALUES(repeat('#',200)); +INSERT INTO t1(b) VALUES(repeat('+',200)); +INSERT INTO t1(b) VALUES(repeat('/',200)); +INSERT INTO t1(b) VALUES(repeat('|',200)); +INSERT INTO t1(b) VALUES(repeat('\\',200)); +INSERT INTO t1(b) VALUES(repeat('-',200)); +INSERT INTO t1(b) VALUES(repeat('&',200)); +INSERT INTO t1(b) VALUES(repeat('%',200)); +INSERT INTO t1(b) VALUES(repeat('@',200)); +INSERT INTO t1(b) VALUES(repeat('?',200)); +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +INSERT INTO t1(b) SELECT b FROM t1; +commit work; +select space from information_schema.innodb_sys_tables where name = 'test/t1' into @space_id; +# Ensure that dirty pages of table t1 is flushed. +flush tables t1 for export; +unlock tables; +begin; +insert into t1(b) values (repeat('_', 42)); +# Make the first page dirty for table t1 +set global innodb_saved_page_number_debug = 0; +set global innodb_fil_make_page_dirty_debug = @space_id; +# Ensure that dirty pages of table t1 are flushed. +set global innodb_buf_flush_list_now = 1; +# Kill the server +# Backup table and system tablespace before corrupting +SELECT * FROM t1; +ERROR 42S02: Table 'test.t1' doesn't exist in engine + FOUND 1 /buffer because it was written in reduced-doublewrite mode/ in mysqld.1.err + FOUND 1 /Database page corruption on disk or a failed file read of tables/ in mysqld.1.err +# Backup table and system tablespace BACK +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +SELECT COUNT(*) FROM t1; +COUNT(*) +2560 +SET GLOBAL innodb_doublewrite=1; +CREATE TABLE t2(a INT PRIMARY KEY AUTO_INCREMENT, b char(255) default '') ENGINE=innodb; +start transaction; +INSERT INTO t2(b) VALUES(repeat('#',200)); +INSERT INTO t2(b) VALUES(repeat('+',200)); +INSERT INTO t2(b) VALUES(repeat('/',200)); +INSERT INTO t2(b) VALUES(repeat('|',200)); +INSERT INTO t2(b) VALUES(repeat('\\',200)); +INSERT INTO t2(b) VALUES(repeat('-',200)); +INSERT INTO t2(b) VALUES(repeat('&',200)); +INSERT INTO t2(b) VALUES(repeat('%',200)); +INSERT INTO t2(b) VALUES(repeat('@',200)); +INSERT INTO t2(b) VALUES(repeat('?',200)); +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +INSERT INTO t2(b) SELECT b FROM t2; +commit work; +select space from information_schema.innodb_sys_tables where name = 'test/t2' into @space_id; +# Ensure that dirty pages of table t2 is flushed. +flush tables t2 for export; +unlock tables; +begin; +insert into t2(b) values (repeat('_', 42)); +# Make the first page dirty for table t2 +set global innodb_saved_page_number_debug = 0; +set global innodb_fil_make_page_dirty_debug = @space_id; +# Ensure that dirty pages of table t2 are flushed. +set global innodb_buf_flush_list_now = 1; +# Kill the server +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +CHECK TABLE t2; +Table Op Msg_type Msg_text +test.t2 check status OK +SELECT COUNT(*) FROM t1; +COUNT(*) +2560 +SELECT COUNT(*) FROM t2; +COUNT(*) +2560 +DROP TABLE t1; +DROP TABLE t2; + FOUND 1 /Trying to recover page/ in mysqld.1.err + FOUND 1 /Recovered page/ in mysqld.1.err diff --git a/mysql-test/suite/innodb/t/innodb_doublewrite-master.opt b/mysql-test/suite/innodb/t/innodb_doublewrite-master.opt new file mode 100644 index 00000000000..82df307e376 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_doublewrite-master.opt @@ -0,0 +1,7 @@ +--innodb-fast-shutdown=2 +--innodb-file-per-table +--innodb_file_format='Barracuda' +--innodb_flush_log_at_trx_commit=1 +--innodb_buffer_pool_load_at_startup=OFF + + diff --git a/mysql-test/suite/innodb/t/innodb_doublewrite.test b/mysql-test/suite/innodb/t/innodb_doublewrite.test new file mode 100644 index 00000000000..ef6191b2449 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_doublewrite.test @@ -0,0 +1 @@ +--source suite/innodb/include/innodb_doublewrite.inc diff --git a/mysql-test/suite/innodb/t/innodb_doublewrite_odirect-master.opt b/mysql-test/suite/innodb/t/innodb_doublewrite_odirect-master.opt new file mode 100644 index 00000000000..ee9cd08c8be --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_doublewrite_odirect-master.opt @@ -0,0 +1,6 @@ +--innodb-fast-shutdown=2 +--innodb-file-per-table +--innodb_file_format='Barracuda' +--innodb_flush_log_at_trx_commit=1 +--innodb-flush-method=O_DIRECT +--innodb_buffer_pool_load_at_startup=OFF diff --git a/mysql-test/suite/innodb/t/innodb_doublewrite_odirect.test b/mysql-test/suite/innodb/t/innodb_doublewrite_odirect.test new file mode 100644 index 00000000000..ef6191b2449 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_doublewrite_odirect.test @@ -0,0 +1 @@ +--source suite/innodb/include/innodb_doublewrite.inc diff --git a/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result b/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result index 4a5baf0aeda..641d8cf5cc8 100644 --- a/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result @@ -4,18 +4,31 @@ COUNT(@@GLOBAL.innodb_doublewrite) 1 1 Expected '#---------------------BS_STVARS_026_02----------------------#' -SET @@GLOBAL.innodb_doublewrite=1; -ERROR HY000: Variable 'innodb_doublewrite' is a read only variable -Expected error 'Read only variable' +SET @global_start_value = @@global.innodb_doublewrite; +SELECT @global_start_value; +@global_start_value +1 +SET @@GLOBAL.innodb_doublewrite = 1 ; SELECT COUNT(@@GLOBAL.innodb_doublewrite); COUNT(@@GLOBAL.innodb_doublewrite) 1 1 Expected +SET @@GLOBAL.innodb_doublewrite = 2; +SELECT @@GLOBAL.innodb_doublewrite; +@@GLOBAL.innodb_doublewrite +2 +SET @@GLOBAL.innodb_doublewrite = 0; +Warnings: +Warning 1210 innodb_doublewrite can not be dynamically changed to or from 0. Do a clean shutdown if you want to change it from or to 0. +SELECT @@GLOBAL.innodb_doublewrite; +@@GLOBAL.innodb_doublewrite +2 +2 Expected '#---------------------BS_STVARS_026_03----------------------#' -SELECT IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE +SELECT @@GLOBAL.innodb_doublewrite = VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME='innodb_doublewrite'; -IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE +@@GLOBAL.innodb_doublewrite = VARIABLE_VALUE 1 1 Expected SELECT COUNT(@@GLOBAL.innodb_doublewrite); @@ -41,13 +54,11 @@ COUNT(@@innodb_doublewrite) SELECT COUNT(@@local.innodb_doublewrite); ERROR HY000: Variable 'innodb_doublewrite' is a GLOBAL variable Expected error 'Variable is a GLOBAL variable' -SELECT COUNT(@@SESSION.innodb_doublewrite); -ERROR HY000: Variable 'innodb_doublewrite' is a GLOBAL variable -Expected error 'Variable is a GLOBAL variable' SELECT COUNT(@@GLOBAL.innodb_doublewrite); COUNT(@@GLOBAL.innodb_doublewrite) 1 1 Expected -SELECT innodb_doublewrite = @@SESSION.innodb_doublewrite; -ERROR 42S22: Unknown column 'innodb_doublewrite' in 'field list' -Expected error 'Readonly variable' +SET @@global.innodb_doublewrite = @global_start_value; +SELECT @@global.innodb_doublewrite; +@@global.innodb_doublewrite +1 diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 7402b84dc96..516975f1675 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -918,18 +918,18 @@ READ_ONLY NO COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME INNODB_DOUBLEWRITE SESSION_VALUE NULL -GLOBAL_VALUE ON +GLOBAL_VALUE 1 GLOBAL_VALUE_ORIGIN COMPILE-TIME -DEFAULT_VALUE ON +DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BOOLEAN -VARIABLE_COMMENT Enable InnoDB doublewrite buffer (enabled by default). Disable with --skip-innodb-doublewrite. -NUMERIC_MIN_VALUE NULL -NUMERIC_MAX_VALUE NULL -NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST OFF,ON -READ_ONLY YES -COMMAND_LINE_ARGUMENT NONE +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT 0=Disable InnoDB doublewrite buffer.1=Enable full doublewrite mode (default).2=Enable reduced doublewrite mode. +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 2 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME INNODB_DOUBLEWRITE_BATCH_SIZE SESSION_VALUE NULL GLOBAL_VALUE 120 diff --git a/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test b/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test index 1ae10d0f7cf..4827aafccde 100644 --- a/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test +++ b/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test @@ -37,36 +37,40 @@ SELECT COUNT(@@GLOBAL.innodb_doublewrite); # Check if Value can set # #################################################################### ---error ER_INCORRECT_GLOBAL_LOCAL_VAR -SET @@GLOBAL.innodb_doublewrite=1; ---echo Expected error 'Read only variable' +SET @global_start_value = @@global.innodb_doublewrite; +SELECT @global_start_value; + +SET @@GLOBAL.innodb_doublewrite = 1 ; + SELECT COUNT(@@GLOBAL.innodb_doublewrite); --echo 1 Expected +SET @@GLOBAL.innodb_doublewrite = 2; +SELECT @@GLOBAL.innodb_doublewrite; + +SET @@GLOBAL.innodb_doublewrite = 0; +SELECT @@GLOBAL.innodb_doublewrite; +--echo 2 Expected --echo '#---------------------BS_STVARS_026_03----------------------#' ################################################################# # Check if the value in GLOBAL Table matches value in variable # ################################################################# ---disable_warnings -SELECT IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE +SELECT @@GLOBAL.innodb_doublewrite = VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME='innodb_doublewrite'; ---enable_warnings --echo 1 Expected SELECT COUNT(@@GLOBAL.innodb_doublewrite); --echo 1 Expected ---disable_warnings SELECT COUNT(VARIABLE_VALUE) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME='innodb_doublewrite'; ---enable_warnings --echo 1 Expected @@ -92,15 +96,8 @@ SELECT COUNT(@@innodb_doublewrite); SELECT COUNT(@@local.innodb_doublewrite); --echo Expected error 'Variable is a GLOBAL variable' ---Error ER_INCORRECT_GLOBAL_LOCAL_VAR -SELECT COUNT(@@SESSION.innodb_doublewrite); ---echo Expected error 'Variable is a GLOBAL variable' - SELECT COUNT(@@GLOBAL.innodb_doublewrite); --echo 1 Expected ---Error ER_BAD_FIELD_ERROR -SELECT innodb_doublewrite = @@SESSION.innodb_doublewrite; ---echo Expected error 'Readonly variable' - - +SET @@global.innodb_doublewrite = @global_start_value; +SELECT @@global.innodb_doublewrite; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 61006c8d89d..706011eebfb 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -5994,6 +5994,18 @@ database_corrupted: " You can use CHECK TABLE to scan" " your table for corruption. " << FORCE_RECOVERY_MSG; + + /* Remove the page that is corrupted when + recovering. */ + if (recv_recovery_on) { + ib::info() << "Removing the corrupted page " + << bpage->id << " in tablepace " + << space->name << " from recovered pages."; + mutex_enter(&recv_sys->mutex); + ut_ad(recv_sys->n_addrs > 0); + recv_sys->n_addrs--; + mutex_exit(&recv_sys->mutex); + } } if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index 17b2229f1da..61151e4bf4b 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -152,14 +152,28 @@ buf_dblwr_init( ut_zalloc_nokey(buf_size * sizeof(bool))); buf_dblwr->write_buf_unaligned = static_cast<byte*>( - ut_malloc_nokey((1 + buf_size) * UNIV_PAGE_SIZE)); + ut_zalloc_nokey((1 + buf_size) * UNIV_PAGE_SIZE)); buf_dblwr->write_buf = static_cast<byte*>( ut_align(buf_dblwr->write_buf_unaligned, UNIV_PAGE_SIZE)); + buf_dblwr->header_unaligned = static_cast<byte*>( + ut_zalloc_nokey(2 * UNIV_PAGE_SIZE)); + + buf_dblwr->header = static_cast<byte*>( + ut_align(buf_dblwr->header_unaligned, + UNIV_PAGE_SIZE)); + buf_dblwr->buf_block_arr = static_cast<buf_page_t**>( ut_zalloc_nokey(buf_size * sizeof(void*))); + + /* Write the page number and the page type to the doublewrite + * header in case it gets used. */ + mach_write_to_4(buf_dblwr->header + FIL_PAGE_OFFSET, + buf_dblwr->block1); + mach_write_to_2(buf_dblwr->header + FIL_PAGE_TYPE, + FIL_PAGE_TYPE_DBLWR_HEADER); } /** Create the doublewrite buffer if the doublewrite buffer header @@ -342,6 +356,73 @@ too_small: goto start_again; } +/***************************************************************//** +Zeroes out the pages in the doublewrite buffer on disk, and flushes them. +This function must be called before the first double-write batch flush after +the doublewrite mode is changed by the user. +This function is only called from buf_dblwr_flush_buffered_writes(), while +it is holding buf_dblwr->mutex, so this function need not be thread-safe. */ +static +void +buf_dblwr_reset( + ulint doublewrite_mode) { + ulint i = 0; + void* page_unaligned = ut_zalloc_nokey( + (TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + 1) * UNIV_PAGE_SIZE); + byte* page = page_align((byte*)page_unaligned + UNIV_PAGE_SIZE); + + ut_a(doublewrite_mode); + + buf_dblwr_being_created = TRUE; + + /* Reset the first half of doublewrite buffer on disk. + * We handle the first block separately because it determines whether + * the last flush was done in the reduced-doublewrite mode. + */ + if (doublewrite_mode == 2) { + /* Write an empty header page */ + i = 1; + memcpy(page, buf_dblwr->header, FIL_PAGE_DATA); + buf_flush_init_for_writing(NULL, page, NULL, 0); + ut_ad(!buf_page_is_corrupted(FALSE, page, univ_page_size, NULL)); + page += UNIV_PAGE_SIZE; + } + + for (; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; ++i) { + mach_write_to_4(page + FIL_PAGE_OFFSET, buf_dblwr->block1 + i); + buf_flush_init_for_writing(NULL, page, NULL, 0); + page += UNIV_PAGE_SIZE; + } + + IORequest write_request(IORequest::WRITE); + const page_id_t page_id(TRX_SYS_SPACE, buf_dblwr->block1); + + page = page_align((byte*)page_unaligned + UNIV_PAGE_SIZE); + + fil_io(write_request, true, page_id, univ_page_size, + 0, + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, + (void*) page, NULL); + + /* Reset the second half of doublewrite buffer on disk. */ + + for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; ++i) { + mach_write_to_4(page + FIL_PAGE_OFFSET, buf_dblwr->block2 + i); + buf_flush_init_for_writing(NULL, page, NULL, 0); + page += UNIV_PAGE_SIZE; + } + + page = page_align((byte*)page_unaligned + UNIV_PAGE_SIZE); + + const page_id_t page_id2(TRX_SYS_SPACE, buf_dblwr->block2); + + fil_io(write_request, true, page_id2, univ_page_size, 0, + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, + (void*) page, NULL); + + ut_free(page_unaligned); +} + /** At database startup initializes the doublewrite buffer memory structure if we already have a doublewrite buffer created in the data files. If we are @@ -354,7 +435,8 @@ recovery, this function loads the pages from double write buffer into memory. dberr_t buf_dblwr_init_or_load_pages( pfs_os_file_t file, - const char* path) + const char* path, + bool load_corrupt_pages) { byte* buf; byte* page; @@ -366,6 +448,7 @@ buf_dblwr_init_or_load_pages( byte* unaligned_read_buf; ibool reset_space_ids = FALSE; recv_dblwr_t& recv_dblwr = recv_sys->dblwr; + bool header_found = false; /* We do the file i/o past the buffer pool */ @@ -469,9 +552,57 @@ buf_dblwr_init_or_load_pages( page = buf; - for (ulint i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) { + /* First check if the first page is of type FIL_PAGE_TYPE_DBLWR_HEADER. + * If so, this means that the last time the doublewrite was used in + * reduced doublewrite mode (innodb_doublewrite=2). + */ + if (fil_page_get_type(page) == FIL_PAGE_TYPE_DBLWR_HEADER) { + header_found = TRUE; + } + + if (header_found && load_corrupt_pages) { + byte* ptr = page + FIL_PAGE_DATA; + ulint num_pages; + const page_size_t& page_size = page_size_t(BUF_DBLWR_HEADER_SIZE, BUF_DBLWR_HEADER_SIZE, true); + + ut_a(!reset_space_ids); + + if (buf_page_is_corrupted( + FALSE, page, page_size, NULL)) { + ib::error() + << "InnoDB: The first block of the doublewrite " + << "buffer is corrupt."; + buf_page_print( + page, + page_size, + BUF_PAGE_PRINT_NO_CRASH); + ut_error; + } + + num_pages = mach_read_from_2(ptr); + ptr += 2; + + for (ulint i = 0; i < num_pages; ++i) { + ulint space_id = mach_read_from_4(ptr); + ptr += 4; + ulint page_no = mach_read_from_4(ptr); + ptr += 4; + recv_dblwr.add(NULL, space_id, page_no); + } + } + + if (header_found) { + page += univ_page_size.physical(); + } + + /* We go through all of the pages in the doublewrite buffer even if + * we found the header page. + */ + for (ulint i = (header_found ? 1 : 0); + i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) { if (reset_space_ids) { ulint source_page_no; + ut_a(!header_found); space_id = 0; mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, @@ -493,6 +624,7 @@ buf_dblwr_init_or_load_pages( write_request, path, file, page, source_page_no * UNIV_PAGE_SIZE, UNIV_PAGE_SIZE); + if (err != DB_SUCCESS) { ib::error() @@ -504,10 +636,15 @@ buf_dblwr_init_or_load_pages( return(err); } - } else if (memcmp(field_ref_zero, page + FIL_PAGE_LSN, 8)) { + } else if (load_corrupt_pages + && memcmp(field_ref_zero, page + FIL_PAGE_LSN, 8)) { /* Each valid page header must contain a nonzero FIL_PAGE_LSN field. */ - recv_dblwr.add(page); + ulint space_id = + mach_read_from_4(page + FIL_PAGE_SPACE_ID); + ulint page_no = + mach_read_from_4(page + FIL_PAGE_OFFSET); + recv_dblwr.add(page, space_id, page_no); } page += univ_page_size.physical(); @@ -529,7 +666,7 @@ buf_dblwr_process() ulint page_no_dblwr = 0; byte* read_buf; byte* unaligned_read_buf; - recv_dblwr_t& recv_dblwr = recv_sys->dblwr; + std::list<recv_dblwr_item_t, ut_allocator<recv_dblwr_item_t> >& dblwr_pages = recv_sys->dblwr.pages; if (!buf_dblwr) { return; @@ -541,12 +678,11 @@ buf_dblwr_process() read_buf = static_cast<byte*>( ut_align(unaligned_read_buf, UNIV_PAGE_SIZE)); - for (recv_dblwr_t::list::iterator i = recv_dblwr.pages.begin(); - i != recv_dblwr.pages.end(); - ++i, ++page_no_dblwr) { - byte* page = *i; - ulint space_id = page_get_space_id(page); - fil_space_t* space = fil_space_get(space_id); + for (std::list<recv_dblwr_item_t>::iterator i = dblwr_pages.begin(); + i != dblwr_pages.end(); ++i, ++page_no_dblwr ) { + byte* page = const_cast<byte*>(i->page); + fil_space_t* space = fil_space_get(i->space_id); + ulint space_id = i->space_id; if (space == NULL) { /* Maybe we have dropped the tablespace @@ -556,7 +692,7 @@ buf_dblwr_process() fil_space_open_if_needed(space); - const ulint page_no = page_get_page_no(page); + const ulint page_no = i->page_no; const page_id_t page_id(space_id, page_no); if (page_no >= space->size) { @@ -575,7 +711,6 @@ buf_dblwr_process() } const page_size_t page_size(space->flags); - ut_ad(!buf_page_is_zeroes(page, page_size)); /* We want to ensure that for partial reads the unread portion of the page is NUL. */ @@ -632,6 +767,21 @@ buf_dblwr_process() << " from the doublewrite buffer."; } + if (!page) { + /* Theoretically we could have another good + copy for this page in the doublewrite + buffer. If not, we will report a fatal error + for a corrupted page somewhere else if that + page was truly needed. */ + + ib::warn() << "Cannot recover page " << page_id + << " from the doublewrite buffer" + << " because it was written in reduced-doublewrite mode"; + continue; + } + + ut_ad(!buf_page_is_zeroes(page, page_size)); + /* Next, validate the doublewrite page. */ if (fil_page_is_compressed_encrypted(page) || fil_page_is_compressed(page)) { @@ -684,7 +834,7 @@ buf_dblwr_process() << " from the doublewrite buffer."; } - recv_dblwr.pages.clear(); + dblwr_pages.clear(); fil_flush_file_spaces(FIL_TYPE_TABLESPACE); ut_free(unaligned_read_buf); @@ -704,6 +854,9 @@ buf_dblwr_free() os_event_destroy(buf_dblwr->s_event); ut_free(buf_dblwr->write_buf_unaligned); buf_dblwr->write_buf_unaligned = NULL; + ut_free(buf_dblwr->header_unaligned); + buf_dblwr->header_unaligned = NULL; + buf_dblwr->header = NULL; ut_free(buf_dblwr->buf_block_arr); buf_dblwr->buf_block_arr = NULL; @@ -953,8 +1106,10 @@ buf_dblwr_flush_buffered_writes() byte* write_buf; ulint first_free; ulint len; + byte* header_ptr; + ulong use_doublewrite_buf = srv_use_doublewrite_buf; - if (!srv_use_doublewrite_buf || buf_dblwr == NULL) { + if (!use_doublewrite_buf || buf_dblwr == NULL) { /* Sync the writes to the disk. */ buf_dblwr_sync_datafiles(); return; @@ -999,6 +1154,31 @@ try_again: start another batch of flushing. */ buf_dblwr->batch_running = true; first_free = buf_dblwr->first_free; + /* Reset the doublewrite buffer if srv_doublewrite_reset is set. + * This protects against the following scenario: + * 1- server starts with full(=1) doublewrite mode and writes a bunch + * of pages to the doublewrite buffer. + * 2- user changes doublewrite mode from full(=1) to reduced(=2). + * 3- server runs for a long time in the reduced doublewrite mode so + * that the copies that were written to the doublewrite buffer in step + * 1 become stale. + * 4- some of the non-doublewrite pages on disk whose copies in the + * doublewrite buffer became stale get corrupted because of a hardware + * or a software failure. + * 5- server crashes. During recovery InnoDB processes pages both + * in the doublewrite header and the following full pages. + * 6- The stale copies in the doublewrite buffer are used to restore + * corrupt non-doublewrite pages on disk. Now the stale data will be + * served when these pages are accessed. + * This is a rare case because it needs the corruption to happen to one + * of the pages written to the doublewrite buffer in full mode. We + * nevertheless protect against this case by resetting the doublewrite + * buffer on disk, when the doublewrite mode changes. + */ + if (srv_doublewrite_reset) { + buf_dblwr_reset(use_doublewrite_buf); + srv_doublewrite_reset = FALSE; + } /* Now safe to release the mutex. Note that though no other thread is allowed to post to the doublewrite batch flushing @@ -1007,14 +1187,23 @@ try_again: mutex_exit(&buf_dblwr->mutex); write_buf = buf_dblwr->write_buf; + header_ptr = buf_dblwr->header + FIL_PAGE_DATA; + memset(header_ptr, 0, BUF_DBLWR_HEADER_SIZE - FIL_PAGE_DATA); + mach_write_to_2(header_ptr, buf_dblwr->first_free); + header_ptr += 2; for (ulint len2 = 0, i = 0; i < buf_dblwr->first_free; len2 += UNIV_PAGE_SIZE, i++) { - const buf_block_t* block; - + const buf_block_t* block; block = (buf_block_t*) buf_dblwr->buf_block_arr[i]; + const buf_page_t* page = &block->page; + + mach_write_to_4(header_ptr, page->id.space()); + header_ptr += 4; + mach_write_to_4(header_ptr, page->id.page_no()); + header_ptr += 4; if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE || block->page.zip.data) { @@ -1033,14 +1222,30 @@ try_again: } /* Write out the first block of the doublewrite buffer */ - len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE, - buf_dblwr->first_free) * UNIV_PAGE_SIZE; + if (use_doublewrite_buf == 2) { + ib_uint32_t checksum = page_zip_calc_checksum( + buf_dblwr->header, BUF_DBLWR_HEADER_SIZE, + static_cast<srv_checksum_algorithm_t>( + srv_checksum_algorithm)); + + const page_size_t& page_size = page_size_t(BUF_DBLWR_HEADER_SIZE, BUF_DBLWR_HEADER_SIZE, true); + mach_write_to_4(buf_dblwr->header + FIL_PAGE_SPACE_OR_CHKSUM, + checksum); + len = BUF_DBLWR_HEADER_SIZE; + write_buf = buf_dblwr->header; + ut_ad(!buf_page_is_corrupted(FALSE, write_buf, page_size, NULL)); + } else { + len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE, + buf_dblwr->first_free) * UNIV_PAGE_SIZE; + } fil_io(IORequestWrite, true, page_id_t(TRX_SYS_SPACE, buf_dblwr->block1), univ_page_size, 0, len, (void*) write_buf, NULL); - if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { + + if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + || use_doublewrite_buf == 2) { /* No unwritten pages in the second block. */ goto flush; } @@ -1058,7 +1263,12 @@ try_again: flush: /* increment the doublewrite flushed pages counter */ - srv_stats.dblwr_pages_written.add(buf_dblwr->first_free); + if (use_doublewrite_buf == 1) { + srv_stats.dblwr_pages_written.add(buf_dblwr->first_free); + } else { + srv_stats.dblwr_pages_written.inc(); + } + srv_stats.dblwr_writes.inc(); /* Now flush the doublewrite buffer data to disk */ diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc index e4bb11c9a22..aca84b9cfd4 100644 --- a/storage/innobase/fsp/fsp0sysspace.cc +++ b/storage/innobase/fsp/fsp0sysspace.cc @@ -571,7 +571,7 @@ SysTablespace::read_lsn_and_check_flags(lsn_t* flushed_lsn) ut_a(it->order() == 0); - buf_dblwr_init_or_load_pages(it->handle(), it->filepath()); + buf_dblwr_init_or_load_pages(it->handle(), it->filepath(), true); /* Check the contents of the first page of the first datafile. */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 68b69e94681..fc42d016698 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -254,7 +254,6 @@ values */ static my_bool innobase_file_format_check; static my_bool innobase_use_atomic_writes; static my_bool innobase_use_fallocate; -static my_bool innobase_use_doublewrite; static my_bool innobase_use_checksums; static my_bool innobase_locks_unsafe_for_binlog; static my_bool innobase_rollback_on_timeout; @@ -4279,8 +4278,6 @@ innobase_change_buffering_inited_ok: srv_n_read_io_threads = (ulint) innobase_read_io_threads; srv_n_write_io_threads = (ulint) innobase_write_io_threads; - srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; - if (!innobase_use_checksums) { ib::warn() << "Setting innodb_checksums to OFF is DEPRECATED." " This option may be removed in future releases. You" @@ -18086,6 +18083,38 @@ innodb_io_capacity_max_update( } /****************************************************************//** +Update the system variable innodb_doublewrite using the "saved" +value. This function is registered as a callback with MySQL. */ +static +void +innodb_doublewrite_update( +/*======================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ +{ + ulong in_val = *static_cast<const ulong*>(save); + if (!in_val || !srv_use_doublewrite_buf) { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "innodb_doublewrite can not be " + "dynamically changed to or from 0. " + "Do a clean shutdown if you want to " + "change it from or to 0."); + } else { + ut_a(in_val == 1 || in_val == 2); + if (srv_use_doublewrite_buf != in_val) { + srv_use_doublewrite_buf = in_val; + srv_doublewrite_reset = 1; + } + } +} + +/****************************************************************//** Update the system variable innodb_io_capacity using the "saved" value. This function is registered as a callback with MySQL. */ static @@ -20530,11 +20559,12 @@ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir, "The common part for InnoDB table spaces.", NULL, NULL, NULL); -static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Enable InnoDB doublewrite buffer (enabled by default)." - " Disable with --skip-innodb-doublewrite.", - NULL, NULL, TRUE); +static MYSQL_SYSVAR_ULONG(doublewrite, srv_use_doublewrite_buf, + PLUGIN_VAR_OPCMDARG, + "0=Disable InnoDB doublewrite buffer." + "1=Enable full doublewrite mode (default)." + "2=Enable reduced doublewrite mode.", + NULL, innodb_doublewrite_update, 1, 0, 2, 0); static MYSQL_SYSVAR_BOOL(use_atomic_writes, innobase_use_atomic_writes, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 8f04f9d15ee..87fa470f897 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -112,6 +112,7 @@ static buf_page_desc_t i_s_page_type[] = { {"IBUF_INDEX", I_S_PAGE_TYPE_IBUF}, {"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED}, {"PAGE COMPRESSED AND ENCRYPTED", FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED}, + {"DOUBLEWRITE HEADER", FIL_PAGE_TYPE_DBLWR_HEADER}, }; /** This structure defines information we will fetch from pages diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h index 598609e2be4..f560429feb9 100644 --- a/storage/innobase/include/buf0dblwr.h +++ b/storage/innobase/include/buf0dblwr.h @@ -37,6 +37,9 @@ Created 2011/12/19 Inaam Rana extern buf_dblwr_t* buf_dblwr; /** Set to TRUE when the doublewrite buffer is being created */ extern ibool buf_dblwr_being_created; +/** The size of the doublewrite header page when the reduced-doublewrite mode +is used. */ +#define BUF_DBLWR_HEADER_SIZE 4096 /** Create the doublewrite buffer if the doublewrite buffer header is not present in the TRX_SYS page. @@ -59,7 +62,8 @@ recovery, this function loads the pages from double write buffer into memory. dberr_t buf_dblwr_init_or_load_pages( pfs_os_file_t file, - const char* path); + const char* path, + bool load_corrupt_pages); /** Process and remove the double write buffer pages for all tablespaces. */ void @@ -157,6 +161,11 @@ struct buf_dblwr_t{ buf_page_t** buf_block_arr;/*!< array to store pointers to the buffer blocks which have been cached to write_buf */ + byte* header;/*!< write buffer used for writing out the + doublewrite header for reduced doublewrite + mode (innodb_doublewrite=2) */ + byte* header_unaligned;/*!< pointer to header, + but unaligned */ }; #endif diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index bf231565657..87f94c0234e 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -392,6 +392,10 @@ extern fil_addr_t fil_addr_null; #define FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED 37401 /*!< Page is compressed and then encrypted */ #define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< page compressed page */ +/*!< First page of the double write buffer holds the +space ids and the page numbers for the most recently +flushed pages. */ +#define FIL_PAGE_TYPE_DBLWR_HEADER 32124 /*!< Doublewrite header */ #define FIL_PAGE_INDEX 17855 /*!< B-tree node */ #define FIL_PAGE_RTREE 17854 /*!< B-tree node */ #define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */ diff --git a/storage/innobase/include/fil0fil.ic b/storage/innobase/include/fil0fil.ic index 9505cc0bd69..d3f26334f3b 100644 --- a/storage/innobase/include/fil0fil.ic +++ b/storage/innobase/include/fil0fil.ic @@ -35,6 +35,8 @@ fil_get_page_type_name( ulint page_type) /*!< in: FIL_PAGE_TYPE */ { switch(page_type) { + case FIL_PAGE_TYPE_DBLWR_HEADER: + return "PAGE_TYPE_DBLWR_HEADER"; case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED: return "PAGE_COMPRESSED_ENRYPTED"; case FIL_PAGE_PAGE_COMPRESSED: @@ -88,6 +90,7 @@ fil_page_type_validate( /* Validate page type */ if (!((page_type == FIL_PAGE_PAGE_COMPRESSED || page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED || + page_type == FIL_PAGE_TYPE_DBLWR_HEADER || page_type == FIL_PAGE_INDEX || page_type == FIL_PAGE_RTREE || page_type == FIL_PAGE_UNDO_LOG || diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 24ad9ae2a30..29b268fcff7 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -173,10 +173,20 @@ struct recv_addr_t{ hash_node_t addr_hash;/*!< hash node in the hash bucket chain */ }; +struct recv_dblwr_item_t { + const byte* page; + ulint space_id; + ulint page_no; +}; + struct recv_dblwr_t { /** Add a page frame to the doublewrite recovery buffer. */ - void add(byte* page) { - pages.push_back(page); + void add(const byte* page, ulint space_id, ulint page_no) { + recv_dblwr_item_t item; + item.page = page; + item.space_id = space_id; + item.page_no = page_no; + pages.push_back(item); } /** Find a doublewrite copy of a page. @@ -186,10 +196,11 @@ struct recv_dblwr_t { @retval NULL if no page was found */ const byte* find_page(ulint space_id, ulint page_no); - typedef std::list<byte*, ut_allocator<byte*> > list; + std::list<recv_dblwr_item_t, ut_allocator<recv_dblwr_item_t> > pages; - /** Recovered doublewrite buffer page frames */ - list pages; + void operator() () { + pages.clear(); + } }; /** Recovery system data structure */ diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 891f25f68f1..f88669bdd7d 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -486,7 +486,8 @@ extern my_bool srv_stats_include_delete_marked; extern unsigned long long srv_stats_modified_counter; extern my_bool srv_stats_sample_traditional; -extern ibool srv_use_doublewrite_buf; +extern ulong srv_use_doublewrite_buf; +extern my_bool srv_doublewrite_reset; extern ulong srv_doublewrite_batch_size; extern ulong srv_checksum_algorithm; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index dc8977e49c8..814d4e4804a 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -3500,14 +3500,14 @@ recv_dblwr_t::find_page(ulint space_id, ulint page_no) { typedef std::vector<const byte*, ut_allocator<const byte*> > matches_t; - matches_t matches; const byte* result = 0; - for (list::iterator i = pages.begin(); i != pages.end(); ++i) { - if (page_get_space_id(*i) == space_id - && page_get_page_no(*i) == page_no) { - matches.push_back(*i); + for (std::list<recv_dblwr_item_t>::iterator i = pages.begin(); i != pages.end(); ++i) { + if (i->page + && (page_get_space_id(i->page) == space_id) + && (page_get_page_no(i->page) == page_no)) { + matches.push_back(i->page); } } diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 2894be6b12c..3999f7bc90a 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -392,8 +392,8 @@ unsigned long long srv_stats_modified_counter; based on number of configured pages */ my_bool srv_stats_sample_traditional; -/** copy of innodb_doublewrite */ -ibool srv_use_doublewrite_buf; +ulong srv_use_doublewrite_buf = 1; +my_bool srv_doublewrite_reset = FALSE; /** innodb_doublewrite_batch_size (a debug parameter) specifies the number of pages to use in LRU and flush_list batch flushing. |