diff options
author | Kristian Nielsen <knielsen@knielsen-hq.org> | 2014-11-25 12:19:48 +0100 |
---|---|---|
committer | Kristian Nielsen <knielsen@knielsen-hq.org> | 2014-11-25 12:19:48 +0100 |
commit | b79685902d95df2c98544e8171b92cb09417328e (patch) | |
tree | d9f8a4be3243c0db761a512a3cc78932e4c41fc7 /mysql-test/suite/rpl | |
parent | f3bdf9d7415bb498a32b42ef9ca10f9ac48a15fe (diff) | |
download | mariadb-git-b79685902d95df2c98544e8171b92cb09417328e.tar.gz |
MDEV-6903: gtid_slave_pos is incorrect after master crash
When a master slave restarts, it logs a special restart format description
event in its binlog. When the slave sees this event, it knows it needs to roll
back any active partial transaction, in case the master crashed previously in
the middle of writing such transaction to its binlog.
However, there was a bug where this rollback did not reset rgi->pending_gtid.
This caused the @@gtid_slave_pos to be updated incorrectly with the GTID of
the partial transaction that was rolled back.
Fix this by always clearing rgi->pending_gtid in cleanup_context(), hopefully
preventing similar bugs from turning up in other special cases where a
transaction is rolled back during replication.
Thanks to Pavel Ivanov for tracking down the issue and providing a test case.
Diffstat (limited to 'mysql-test/suite/rpl')
-rw-r--r-- | mysql-test/suite/rpl/r/rpl_gtid_crash.result | 61 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_gtid_crash.test | 100 |
2 files changed, 161 insertions, 0 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_gtid_crash.result b/mysql-test/suite/rpl/r/rpl_gtid_crash.result index 3417ad561f4..bbe1dfc6c5f 100644 --- a/mysql-test/suite/rpl/r/rpl_gtid_crash.result +++ b/mysql-test/suite/rpl/r/rpl_gtid_crash.result @@ -133,9 +133,17 @@ SELECT @@GLOBAL.server_id; 3 SELECT * from t1 WHERE a > 10 ORDER BY a; a +gtid_check +Binlog pos ok # Wait 30 seconds for SQL thread to catch up with IO thread SELECT * from t1 WHERE a > 10 ORDER BY a; a +gtid_check +Binlog pos ok +gtid_check +Slave pos ok +gtid_check +Current pos ok # Repeat this with additional transactions on the master SET GLOBAL debug_dbug="+d,inject_error_writing_xid"; BEGIN; @@ -175,11 +183,21 @@ SELECT * from t1 WHERE a > 10 ORDER BY a; a 13 14 +gtid_check +Binlog pos ok +gtid_check +Current pos ok # Wait 30 seconds for SQL thread to catch up with IO thread SELECT * from t1 WHERE a > 10 ORDER BY a; a 13 14 +gtid_check +Binlog pos ok +gtid_check +Slave pos ok +gtid_check +Current pos ok # Repeat this with additional transactions on the master SET GLOBAL debug_dbug="+d,inject_error_writing_xid"; BEGIN; @@ -205,5 +223,48 @@ a 14 23 24 +# Repeat this with slave restart +SET GLOBAL debug_dbug="+d,inject_error_writing_xid"; +BEGIN; +INSERT INTO t1 VALUES (25); +COMMIT; +ERROR HY000: Error writing file 'master-bin' (errno: 28 "No space left on device") +SET GLOBAL debug_dbug="+d,crash_dispatch_command_before"; +COMMIT; +Got one of the listed errors +# Wait 30 seconds for IO thread to connect and SQL thread to catch up +# with IO thread. +include/stop_slave.inc +gtid_check +Binlog pos ok +gtid_check +Current pos ok +INSERT INTO t1 VALUES (26); +INSERT INTO t1 VALUES (27); +SELECT * from t1 WHERE a > 10 ORDER BY a; +a +13 +14 +23 +24 +26 +27 +include/save_master_gtid.inc +gtid_check +Binlog pos ok +gtid_check +Slave pos ok +gtid_check +Current pos ok +include/start_slave.inc +include/sync_with_master_gtid.inc +SELECT * from t1 WHERE a > 10 ORDER BY a; +a +13 +14 +23 +24 +26 +27 DROP TABLE t1; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_gtid_crash.test b/mysql-test/suite/rpl/t/rpl_gtid_crash.test index 0caad2a12fe..fed9b8256db 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_crash.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_crash.test @@ -269,6 +269,7 @@ SET GLOBAL debug_dbug="+d,crash_before_writing_xid"; --connection server_1 INSERT INTO t1 VALUES (9), (10); +--let $saved_gtid=`SELECT @@last_gtid` --save_master_pos --connection server_2 @@ -333,6 +334,9 @@ EOF SELECT @@GLOBAL.server_id; SELECT * from t1 WHERE a > 10 ORDER BY a; +--disable_query_log +eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +--enable_query_log --echo # Wait 30 seconds for SQL thread to catch up with IO thread --connection server_2 @@ -357,6 +361,11 @@ if ($read_log_pos != $exec_log_pos) } SELECT * from t1 WHERE a > 10 ORDER BY a; +--disable_query_log +eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +eval SELECT IF(INSTR(@@gtid_slave_pos, '$saved_gtid'), "Slave pos ok", CONCAT("Unexpected slave pos: ", @@gtid_slave_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +eval SELECT IF(INSTR(@@gtid_current_pos, '$saved_gtid'), "Current pos ok", CONCAT("Unexpected current pos: ", @@gtid_current_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +--enable_query_log --echo # Repeat this with additional transactions on the master @@ -387,6 +396,7 @@ EOF SELECT @@GLOBAL.server_id; INSERT INTO t1 VALUES (13); INSERT INTO t1 VALUES (14); +--let $saved_gtid=`SELECT @@last_gtid` SELECT * from t1 WHERE a > 10 ORDER BY a; --source include/save_master_gtid.inc @@ -420,6 +430,10 @@ EOF SELECT @@GLOBAL.server_id; SELECT * from t1 WHERE a > 10 ORDER BY a; +--disable_query_log +eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +eval SELECT IF(INSTR(@@gtid_current_pos, '$saved_gtid'), "Current pos ok", CONCAT("Unexpected current pos: ", @@gtid_current_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +--enable_query_log --echo # Wait 30 seconds for SQL thread to catch up with IO thread --connection server_2 @@ -444,6 +458,11 @@ if ($read_log_pos != $exec_log_pos) } SELECT * from t1 WHERE a > 10 ORDER BY a; +--disable_query_log +eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +eval SELECT IF(INSTR(@@gtid_slave_pos, '$saved_gtid'), "Slave pos ok", CONCAT("Unexpected slave pos: ", @@gtid_slave_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +eval SELECT IF(INSTR(@@gtid_current_pos, '$saved_gtid'), "Current pos ok", CONCAT("Unexpected current pos: ", @@gtid_current_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +--enable_query_log --echo # Repeat this with additional transactions on the master @@ -472,6 +491,7 @@ EOF INSERT INTO t1 VALUES (23); INSERT INTO t1 VALUES (24); +--let $saved_gtid=`SELECT @@last_gtid` SELECT * from t1 WHERE a > 10 ORDER BY a; --source include/save_master_gtid.inc @@ -479,6 +499,86 @@ SELECT * from t1 WHERE a > 10 ORDER BY a; --source include/sync_with_master_gtid.inc SELECT * from t1 WHERE a > 10 ORDER BY a; +--echo # Repeat this with slave restart + +--connection server_1 +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +SET GLOBAL debug_dbug="+d,inject_error_writing_xid"; +BEGIN; +INSERT INTO t1 VALUES (25); +--error ER_ERROR_ON_WRITE +COMMIT; +SET GLOBAL debug_dbug="+d,crash_dispatch_command_before"; +--error 2006,2013 +COMMIT; + +--source include/wait_until_disconnected.inc + +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart +EOF + +--connection server_1 +--enable_reconnect +--source include/wait_until_connected_again.inc + +--connection server_2 +--echo # Wait 30 seconds for IO thread to connect and SQL thread to catch up +--echo # with IO thread. +--let $wait_timeout= 300 +while ($wait_timeout != 0) +{ + --let $connected=`SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE State = 'Waiting for master to send event'` + if ($connected) + { + --let $read_log_pos= query_get_value('SHOW SLAVE STATUS', Read_Master_Log_Pos, 1) + --let $exec_log_pos= query_get_value('SHOW SLAVE STATUS', Exec_Master_Log_Pos, 1) + if ($read_log_pos == $exec_log_pos) + { + --let $wait_timeout= 0 + } + if ($read_log_pos != $exec_log_pos) + { + --sleep 0.1 + --dec $wait_timeout + } + } + if (!$connected) + { + --sleep 0.1 + --dec $wait_timeout + } +} +if (`SELECT NOT $connected OR $read_log_pos != $exec_log_pos`) +{ + --die Timeout wait for IO thread to connect and SQL thread to catch up with IO thread +} + +--source include/stop_slave.inc + +--connection server_1 +--disable_query_log +eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +eval SELECT IF(INSTR(@@gtid_current_pos, '$saved_gtid'), "Current pos ok", CONCAT("Unexpected current pos: ", @@gtid_current_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +--enable_query_log +INSERT INTO t1 VALUES (26); +INSERT INTO t1 VALUES (27); +SELECT * from t1 WHERE a > 10 ORDER BY a; +--source include/save_master_gtid.inc + +--connection server_2 +--disable_query_log +eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +eval SELECT IF(INSTR(@@gtid_slave_pos, '$saved_gtid'), "Slave pos ok", CONCAT("Unexpected slave pos: ", @@gtid_slave_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +eval SELECT IF(INSTR(@@gtid_current_pos, '$saved_gtid'), "Current pos ok", CONCAT("Unexpected current pos: ", @@gtid_current_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check; +--enable_query_log +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SELECT * from t1 WHERE a > 10 ORDER BY a; + --connection server_1 DROP TABLE t1; |