diff options
25 files changed, 2618 insertions, 126 deletions
diff --git a/mysql-test/suite/binlog/r/binlog_truncate_active_log.result b/mysql-test/suite/binlog/r/binlog_truncate_active_log.result new file mode 100644 index 00000000000..3ad9f5c560c --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_active_log.result @@ -0,0 +1,237 @@ +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +RESET MASTER; +CREATE TABLE t (f INT) ENGINE=INNODB; +CREATE TABLE t2 (f INT) ENGINE=INNODB; +CREATE TABLE tm (f INT) ENGINE=Aria; +# Case A. +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection default; +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); +connection master1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +INSERT INTO t VALUES (20); +connection master2; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +DELETE FROM t2 WHERE f = 0 /* no such record */; +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; +Before the crash +0-1-7 +connection default; +# Kill the server +disconnect master1; +disconnect master2; +disconnect master3; +# restart: --rpl-semi-sync-slave-enabled=1 +FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-6/ in mysqld.1.err +Pre-crash binlog file content: +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # GTID #-#-# +master-bin.000001 # Query # # use `test`; CREATE TABLE t (f INT) ENGINE=INNODB +master-bin.000001 # Gtid # # GTID #-#-# +master-bin.000001 # Query # # use `test`; CREATE TABLE t2 (f INT) ENGINE=INNODB +master-bin.000001 # Gtid # # GTID #-#-# +master-bin.000001 # Query # # use `test`; CREATE TABLE tm (f INT) ENGINE=Aria +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; INSERT INTO t VALUES (10) +master-bin.000001 # Xid # # COMMIT /* XID */ +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; INSERT INTO tm VALUES (10) +master-bin.000001 # Query # # COMMIT +SELECT @@global.gtid_binlog_pos as 'After the crash'; +After the crash +0-1-5 +"One row should be present in table 't'" +SELECT * FROM t; +f +10 +DELETE FROM t; +# Case B. +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection default; +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); +connection master1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +DELETE FROM t2 WHERE f = 0; +connection master2; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +INSERT INTO t VALUES (20); +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; +Before the crash +0-1-10 +connection default; +# Kill the server +disconnect master1; +disconnect master2; +disconnect master3; +# restart: --rpl-semi-sync-slave-enabled=1 +FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-10/ in mysqld.1.err +Pre-crash binlog file content: +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Query # # use `test`; DELETE FROM t +master-bin.000002 # Xid # # COMMIT /* XID */ +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Query # # use `test`; INSERT INTO t VALUES (10) +master-bin.000002 # Xid # # COMMIT /* XID */ +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Query # # use `test`; INSERT INTO tm VALUES (10) +master-bin.000002 # Query # # COMMIT +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Query # # use `test`; DELETE FROM t2 WHERE f = 0 +master-bin.000002 # Query # # COMMIT +SELECT @@global.gtid_binlog_pos as 'After the crash'; +After the crash +0-1-9 +"One row should be present in table 't'" +SELECT * FROM t; +f +10 +DELETE FROM t; +# Case C. +CREATE PROCEDURE sp_blank_xa() +BEGIN +XA START 'blank'; +DELETE FROM t2 WHERE f = 0 /* no such record */; +XA END 'blank'; +XA PREPARE 'blank'; +END| +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection default; +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); +connection master1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +INSERT INTO t VALUES (20); +connection master2; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +CALL sp_blank_xa; +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; +Before the crash +0-1-15 +connection default; +# Kill the server +disconnect master1; +disconnect master2; +disconnect master3; +# restart: --rpl-semi-sync-slave-enabled=1 +FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-14/ in mysqld.1.err +Pre-crash binlog file content: +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000003 # Gtid # # BEGIN GTID #-#-# +master-bin.000003 # Query # # use `test`; DELETE FROM t +master-bin.000003 # Xid # # COMMIT /* XID */ +master-bin.000003 # Gtid # # GTID #-#-# +master-bin.000003 # Query # # use `test`; CREATE DEFINER=`root`@`localhost` PROCEDURE `sp_blank_xa`() +BEGIN +XA START 'blank'; +DELETE FROM t2 WHERE f = 0 /* no such record */; +XA END 'blank'; +XA PREPARE 'blank'; +END +master-bin.000003 # Gtid # # BEGIN GTID #-#-# +master-bin.000003 # Query # # use `test`; INSERT INTO t VALUES (10) +master-bin.000003 # Xid # # COMMIT /* XID */ +master-bin.000003 # Gtid # # BEGIN GTID #-#-# +master-bin.000003 # Query # # use `test`; INSERT INTO tm VALUES (10) +master-bin.000003 # Query # # COMMIT +SELECT @@global.gtid_binlog_pos as 'After the crash'; +After the crash +0-1-13 +"One row should be present in table 't'" +SELECT * FROM t; +f +10 +DELETE FROM t; +DROP PROCEDURE sp_blank_xa; +# Case D. +CREATE PROCEDURE sp_xa() +BEGIN +XA START 'xid'; +DELETE FROM t WHERE f = 10; +XA END 'xid'; +XA PREPARE 'xid'; +END| +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection default; +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); +connection master1; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +CALL sp_xa; +connection master2; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +INSERT INTO t2 VALUES (20); +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; +Before the crash +0-1-20 +connection default; +# Kill the server +disconnect master1; +disconnect master2; +disconnect master3; +# restart: --rpl-semi-sync-slave-enabled=1 +FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-20/ in mysqld.1.err +Pre-crash binlog file content: +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000004 # Gtid # # BEGIN GTID #-#-# +master-bin.000004 # Query # # use `test`; DELETE FROM t +master-bin.000004 # Xid # # COMMIT /* XID */ +master-bin.000004 # Gtid # # GTID #-#-# +master-bin.000004 # Query # # use `test`; DROP PROCEDURE sp_blank_xa +master-bin.000004 # Gtid # # GTID #-#-# +master-bin.000004 # Query # # use `test`; CREATE DEFINER=`root`@`localhost` PROCEDURE `sp_xa`() +BEGIN +XA START 'xid'; +DELETE FROM t WHERE f = 10; +XA END 'xid'; +XA PREPARE 'xid'; +END +master-bin.000004 # Gtid # # BEGIN GTID #-#-# +master-bin.000004 # Query # # use `test`; INSERT INTO t VALUES (10) +master-bin.000004 # Xid # # COMMIT /* XID */ +master-bin.000004 # Gtid # # BEGIN GTID #-#-# +master-bin.000004 # Query # # use `test`; INSERT INTO tm VALUES (10) +master-bin.000004 # Query # # COMMIT +master-bin.000004 # Gtid # # XA START X'786964',X'',1 GTID #-#-# +master-bin.000004 # Query # # use `test`; DELETE FROM t WHERE f = 10 +master-bin.000004 # Query # # XA END X'786964',X'',1 +master-bin.000004 # XA_prepare # # XA PREPARE X'786964',X'',1 +SELECT @@global.gtid_binlog_pos as 'After the crash'; +After the crash +0-1-19 +"One row should be present in table 't'" +SELECT * FROM t; +f +10 +DELETE FROM t; +DROP PROCEDURE sp_xa; +# Cleanup +DROP TABLE t,t2,tm; +# End of the tests diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result new file mode 100644 index 00000000000..b8fd04497f2 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result @@ -0,0 +1,189 @@ +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb; +# +# +# Case "A" : "neither engine committed => rollback & binlog truncate" +# +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; +connect con1,localhost,root,,; +List of binary logs before rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +BEGIN; +INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR signal_no_signal"; +COMMIT; +connection default; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +List of binary logs after rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# restart the server with --rpl-semi-sync-slave-enabled=1 +# the server is restarted +# restart: --rpl-semi-sync-slave-enabled=1 +connection default; +# +# *** Summary: 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3: +# +SELECT COUNT(*) FROM t1; +COUNT(*) +1 +SELECT COUNT(*) FROM t2; +COUNT(*) +1 +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-2 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-2 +List of binary logs at the end of the tests +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# *** +DELETE FROM t1; +DELETE FROM t2; +disconnect con1; +# +Proof of the truncated binlog file is readable (two transactions must be seen): +/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/; +/*!40019 SET @@session.max_insert_delayed_threads=0*/; +/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/; +DELIMITER /*!*/; +START TRANSACTION +/*!*/; +COMMIT/*!*/; +START TRANSACTION +/*!*/; +COMMIT/*!*/; +DELIMITER ; +# End of log file +ROLLBACK /* added by mysqlbinlog */; +/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/; +/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/; +# +# +# Case "B" : "one engine has committed its transaction branch" +# +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; +connect con1,localhost,root,,; +List of binary logs before rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +SET GLOBAL debug_dbug="d,enable_log_write_upto_crash"; +BEGIN; +INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +COMMIT; +connection default; +# restart: --rpl-semi-sync-slave-enabled=1 +connection default; +# +# *** Summary: 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4: +# +SELECT COUNT(*) FROM t1; +COUNT(*) +2 +SELECT COUNT(*) FROM t2; +COUNT(*) +2 +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-3 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-3 +List of binary logs at the end of the tests +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +master-bin.000004 # +# *** +DELETE FROM t1; +DELETE FROM t2; +disconnect con1; +# +# +# +# Case "C" : "both engines have committed its transaction branch" +# +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; +connect con1,localhost,root,,; +List of binary logs before rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +BEGIN; +INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +SET DEBUG_SYNC= "commit_after_run_commit_ordered SIGNAL con1_ready WAIT_FOR signal_no_signal"; +COMMIT; +connection default; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +List of binary logs after rotation +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# restart the server with --rpl-semi-sync-slave-enabled=1 +# the server is restarted +# restart: --rpl-semi-sync-slave-enabled=1 +connection default; +# +# *** Summary: 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4: +# +SELECT COUNT(*) FROM t1; +COUNT(*) +2 +SELECT COUNT(*) FROM t2; +COUNT(*) +2 +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-3 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-3 +List of binary logs at the end of the tests +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +master-bin.000004 # +# *** +DELETE FROM t1; +DELETE FROM t2; +disconnect con1; +# +DROP TABLE t1, t2; +# End of the tests diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result new file mode 100644 index 00000000000..e11a40feb2f --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result @@ -0,0 +1,53 @@ +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +SET @@global.max_binlog_size= 4096; +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=MyISAM; +connect master1,localhost,root,,; +"List of binary logs before rotation" +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +INSERT INTO ti VALUES(1,"I am gonna survive"); +INSERT INTO tm VALUES(1,"me too!"); +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR con1_go"; +INSERT INTO ti VALUES (2, REPEAT("x", 4100)); +connect master2,localhost,root,,; +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +INSERT INTO ti VALUES (3, "not gonna survive"); +connection default; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +"List of binary logs before crash" +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +# The gtid binlog state prior the crash will be truncated at the end of the test +SELECT @@global.gtid_binlog_state; +@@global.gtid_binlog_state +0-1-6 +connection default; +# Kill the server +disconnect master1; +disconnect master2; +# restart: --rpl-semi-sync-slave-enabled=1 +FOUND 1 /truncated binlog file:.*master.*000002/ in mysqld.1.err +"One record should be present in table" +SELECT * FROM ti; +a b +1 I am gonna survive +# The truncated gtid binlog state +SELECT @@global.gtid_binlog_state; +@@global.gtid_binlog_state +0-1-4 +SELECT @@global.gtid_binlog_pos; +@@global.gtid_binlog_pos +0-1-4 +# Cleanup +DROP TABLE ti; +# End of the tests diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result new file mode 100644 index 00000000000..0ee9a7c871d --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result @@ -0,0 +1,58 @@ +SET @@global.max_binlog_size= 4096; +call mtr.add_suppression("Table '.*tm' is marked as crashed and should be repaired"); +call mtr.add_suppression("Got an error from unknown thread"); +call mtr.add_suppression("Checking table: '.*tm'"); +call mtr.add_suppression("Recovering table: '.*tm'"); +call mtr.add_suppression("Cannot truncate the binary log to file"); +call mtr.add_suppression("Crash recovery failed"); +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("Found 1 prepared transactions"); +call mtr.add_suppression("mysqld: Table.*tm.*is marked as crashed"); +call mtr.add_suppression("Checking table.*tm"); +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (f INT) ENGINE=MYISAM; +INSERT INTO tm VALUES(1); +connect master1,localhost,root,,; +connect master2,localhost,root,,; +connect master3,localhost,root,,; +connection master1; +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR master1_go"; +INSERT INTO ti VALUES (5 - 1, REPEAT("x", 4100)); +connection master2; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +INSERT INTO ti VALUES (5, REPEAT("x", 1)); +connection master3; +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master3_ready"; +INSERT INTO tm VALUES (2); +connection default; +SET DEBUG_SYNC= "now WAIT_FOR master3_ready"; +# The gtid binlog state prior the crash must be restored at the end of the test; +SELECT @@global.gtid_binlog_state; +@@global.gtid_binlog_state +0-1-9 +# Kill the server +# Failed restart as the semisync slave +# Normal restart +# restart +FOUND 1 /Cannot truncate the binary log to file/ in mysqld.1.err +# Proof that the in-doubt transactions are recovered by the 2nd normal server restart +SELECT COUNT(*) = 5 as 'True' FROM ti; +True +1 +SELECT COUNT(*) <= 1 FROM tm; +COUNT(*) <= 1 +1 +# The gtid binlog state prior the crash is restored now +SELECT @@GLOBAL.gtid_binlog_state; +@@GLOBAL.gtid_binlog_state +0-1-9 +SELECT @@GLOBAL.gtid_binlog_pos; +@@GLOBAL.gtid_binlog_pos +0-1-9 +# Cleanup +DROP TABLE ti, tm; +End of test diff --git a/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc b/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc new file mode 100644 index 00000000000..0bc83477d91 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc @@ -0,0 +1,57 @@ +connect(master1,localhost,root,,); +connect(master2,localhost,root,,); +connect(master3,localhost,root,,); + +--connection default + +# First to commit few transactions +INSERT INTO t VALUES (10); +INSERT INTO tm VALUES (10); + +--connection master1 +# Hold insert after write to binlog and before "run_commit_ordered" in engine +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives"; +--send_eval $query1 + +--connection master2 +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready"; +--send_eval $query2 + +--connection master3 +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SELECT @@global.gtid_binlog_pos as 'Before the crash'; + +--connection default +--source include/kill_mysqld.inc +--disconnect master1 +--disconnect master2 +--disconnect master3 + +# +# Server restart +# +--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 +--source include/start_mysqld.inc + +# Check error log for a successful truncate message. +--let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err + +--let SEARCH_FILE=$log_error_ +--let SEARCH_PATTERN=Successfully truncated.*to remove transactions starting from GTID $truncate_gtid_pos + +--source include/search_pattern_in_file.inc + +--echo Pre-crash binlog file content: +--let $binlog_file= query_get_value(show binary logs, Log_name, $binlog_file_index) +--source include/show_binlog_events.inc + +SELECT @@global.gtid_binlog_pos as 'After the crash'; +--echo "One row should be present in table 't'" +SELECT * FROM t; + +# prepare binlog file index for the next test +--inc $binlog_file_index + +# Local cleanup +DELETE FROM t; diff --git a/mysql-test/suite/binlog/t/binlog_truncate_active_log.test b/mysql-test/suite/binlog/t/binlog_truncate_active_log.test new file mode 100644 index 00000000000..dbba8697b86 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_active_log.test @@ -0,0 +1,102 @@ +# ==== Purpose ==== +# +# Test verifies the truncation of single binary log file. +# +# ==== References ==== +# +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + +--source include/have_innodb.inc +--source include/have_aria.inc +# File: binlog_truncate_active_log.inc included in test makes use of +# 'debug_sync' facility. +--source include/have_debug_sync.inc +--source include/have_binlog_format_statement.inc + +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); + +# The following cases are tested: +# A. 2pc transaction is followed by a blank "zero-engines" one +# B. 2pc transaction follows the blank one +# C. Similarly to A, with the XA blank transaction + +RESET MASTER; +CREATE TABLE t (f INT) ENGINE=INNODB; +CREATE TABLE t2 (f INT) ENGINE=INNODB; +CREATE TABLE tm (f INT) ENGINE=Aria; + +# Old (pre-crash) binlog file index initial value. +# It keeps incremented at the end of each case. +--let $binlog_file_index=1 + +--echo # Case A. +# Using 'debug_sync' hold 'query1' execution after 'query1' is flushed and +# synced to binary log but not yet committed. In an another connection hold +# 'query2' execution after 'query2' is flushed and synced to binlog. +# Crash and restart server with --rpl-semi-sync-slave-enabled=1 +# +# During recovery of binary log 'query1' status is checked with InnoDB engine, +# it will be in prepared but not yet commited. All transactions starting from +# 'query1' onwards will be removed from the binary log. +# Show-binlog-events is to prove that. + +--let $truncate_gtid_pos = 0-1-6 +--let $query1 = INSERT INTO t VALUES (20) +--let $query2 = DELETE FROM t2 WHERE f = 0 /* no such record */ +--source binlog_truncate_active_log.inc + +--echo # Case B. +# The inverted sequence ends up to truncate starting from $query2 +--let $truncate_gtid_pos = 0-1-10 +--let $query1 = DELETE FROM t2 WHERE f = 0 +--let $query2 = INSERT INTO t VALUES (20) +--source binlog_truncate_active_log.inc + + +--echo # Case C. +delimiter |; +CREATE PROCEDURE sp_blank_xa() +BEGIN + XA START 'blank'; + DELETE FROM t2 WHERE f = 0 /* no such record */; + XA END 'blank'; + XA PREPARE 'blank'; +END| +delimiter ;| + +# The same as in A with $query2 being the zero-engine XA transaction. +# Both $query1 and $query2 are going to be truncated. +--let $truncate_gtid_pos = 0-1-14 +--let $query1 = INSERT INTO t VALUES (20) +--let $query2 = CALL sp_blank_xa +--source binlog_truncate_active_log.inc + +DROP PROCEDURE sp_blank_xa; + + +--echo # Case D. +delimiter |; +CREATE PROCEDURE sp_xa() +BEGIN + XA START 'xid'; + DELETE FROM t WHERE f = 10; + XA END 'xid'; + XA PREPARE 'xid'; +END| +delimiter ;| + +# The same as in B with $query1 being the prepared XA transaction. +# Truncation must occurs at $query2. +--let $truncate_gtid_pos = 0-1-20 +--let $query1 = CALL sp_xa +--let $query2 = INSERT INTO t2 VALUES (20) +--source binlog_truncate_active_log.inc + +DROP PROCEDURE sp_xa; + + +--echo # Cleanup +DROP TABLE t,t2,tm; + +--echo # End of the tests diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc new file mode 100644 index 00000000000..52ce4741eaa --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc @@ -0,0 +1,73 @@ +# +# Invoked by binlog_truncate_multi_engine.test +# Parameters: +# $debug_sync_action describes debug-sync actions +# $kill_server 1 when to crash, 0 for regular restart +# $restart_parameters the caller may simulate partial commit at recovery +# $test_outcome summary of extected results +# $MYSQLD_DATADIR + +--echo # +--echo # +--echo # Case $case : $description +--echo # +RESET MASTER; +FLUSH LOGS; +SET GLOBAL max_binlog_size= 4096; + +connect(con1,localhost,root,,); +--echo List of binary logs before rotation +--source include/show_binary_logs.inc +INSERT INTO t1 VALUES (1, REPEAT("x", 1)); +INSERT INTO t2 VALUES (1, REPEAT("x", 1)); +if (`SELECT $case = "B"`) +{ + --write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait-binlog_truncate_multi_engine.test +EOF + + SET GLOBAL debug_dbug="d,enable_log_write_upto_crash"; +} +BEGIN; + INSERT INTO t2 VALUES (2, REPEAT("x", 4100)); + INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); + +if (`SELECT $debug_sync_action != ""`) +{ + --eval SET DEBUG_SYNC= $debug_sync_action +} +send COMMIT; + +--connection default +if (`SELECT $case = "B"`) +{ + --source include/wait_until_disconnected.inc + --source include/start_mysqld.inc +} +if (`SELECT $case != "B"`) +{ + SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; + --echo List of binary logs after rotation + --source include/show_binary_logs.inc + + --echo # restart the server with $restart_parameters + --echo # the server is restarted + --source include/restart_mysqld.inc +} + +--connection default +--echo # +--echo # *** Summary: $test_outcome: +--echo # +SELECT COUNT(*) FROM t1; +SELECT COUNT(*) FROM t2; +SELECT @@GLOBAL.gtid_binlog_state; +SELECT @@GLOBAL.gtid_binlog_pos; +--echo List of binary logs at the end of the tests +--source include/show_binary_logs.inc +--echo # *** +# cleanup +DELETE FROM t1; +DELETE FROM t2; +--disconnect con1 +--echo # diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.opt b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.opt new file mode 100644 index 00000000000..03e7d74f6db --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.opt @@ -0,0 +1 @@ +--plugin-load=$HA_ROCKSDB_SO diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test new file mode 100644 index 00000000000..f8e32f16f0f --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test @@ -0,0 +1,60 @@ +# ==== Purpose ==== +# +# Test verifies truncation of multiple binary logs with multiple transactional +# storage engines +# +# ==== References ==== +# +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + +--source include/have_rocksdb.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +--let $old_max_binlog_size= `select @@global.max_binlog_size` +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +--let $MYSQLD_DATADIR= `SELECT @@datadir` + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb; + +--let $case = "A" +--let $description = "neither engine committed => rollback & binlog truncate" +# Hold off engine commits after write to binlog and its rotation. +# The transaction is killed along with the server after that. +--let $shutdown_timeout=0 +--let $debug_sync_action = "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR signal_no_signal" +--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 +--let $test_outcome= 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3 +--source binlog_truncate_multi_engine.inc +--echo Proof of the truncated binlog file is readable (two transactions must be seen): +--exec $MYSQL_BINLOG --short-form --skip-annotate-row-events $MYSQLD_DATADIR/master-bin.000002 + +--let $case = "B" +--let $description = "one engine has committed its transaction branch" +# Hold off after one engine has committed. +--let $shutdown_timeout=0 +--let $debug_sync_action = "" +# Both debug_sync and debug-dbug are required to make sure Engines remember the commit state +# debug_sync alone will not help. +--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 +--let $test_outcome= 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4 +--source binlog_truncate_multi_engine.inc + +--let $case = "C" +--let $description= "both engines have committed its transaction branch" +--let $debug_sync_action = "commit_after_run_commit_ordered SIGNAL con1_ready WAIT_FOR signal_no_signal" +# Hold off after both engines have committed. The server is shut down. +--let $shutdown_timeout= +--let $restart_parameters = --rpl-semi-sync-slave-enabled=1 +--let $test_outcome= 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4 +--source binlog_truncate_multi_engine.inc + + + +DROP TABLE t1, t2; + +--echo # End of the tests diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test new file mode 100644 index 00000000000..4ea7f9a559e --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test @@ -0,0 +1,77 @@ +# ==== Purpose ==== +# +# Test verifies truncation of multiple binary logs. +# +# ==== References ==== +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); + +SET @@global.max_binlog_size= 4096; + +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=MyISAM; + +connect(master1,localhost,root,,); +--echo "List of binary logs before rotation" +--source include/show_binary_logs.inc + +# Some load to either non- and transactional egines +# that should not affect the following recovery: +INSERT INTO ti VALUES(1,"I am gonna survive"); +INSERT INTO tm VALUES(1,"me too!"); + +# hold on near engine commit +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR con1_go"; +--send INSERT INTO ti VALUES (2, REPEAT("x", 4100)) + +connect(master2,localhost,root,,); +# The 2nd trx for recovery, it does not rotate binlog +SET DEBUG_SYNC= "now WAIT_FOR master1_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +--send INSERT INTO ti VALUES (3, "not gonna survive") + +--connection default +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +--echo "List of binary logs before crash" +--source include/show_binary_logs.inc +--echo # The gtid binlog state prior the crash will be truncated at the end of the test +SELECT @@global.gtid_binlog_state; + +--connection default +--source include/kill_mysqld.inc +--disconnect master1 +--disconnect master2 + +# +# Server restart +# +--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 +--source include/start_mysqld.inc + +# Check error log for a successful truncate message. +let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err; + +--let SEARCH_FILE=$log_error_ +--let SEARCH_PATTERN=truncated binlog file:.*master.*000002 +--source include/search_pattern_in_file.inc + + +--echo "One record should be present in table" +SELECT * FROM ti; + +--echo # The truncated gtid binlog state +SELECT @@global.gtid_binlog_state; +SELECT @@global.gtid_binlog_pos; + +--echo # Cleanup +DROP TABLE ti; + +--echo # End of the tests diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test new file mode 100644 index 00000000000..04d8619e24e --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test @@ -0,0 +1,119 @@ +# ==== Purpose ==== +# The test verifies attempt to recover by the semisync slave server whose +# binlog is unsafe for truncation. +# +# ==== Implementation ==== +# 2 binlog files are created with the 1st one destined to be the binlog +# checkpoint file for recovery. +# The final group of events is replication unsafe (myisam INSERT). +# Therefore the semisync slave recovery may not. +# +# Steps: +# 0 - Set max_binlog_size= 4096, to help an insert into a +# transaction table 'ti' get binlog rotated while the +# transaction won't be committed, being stopped at +# a prior to commit debug_sync point +# 1 - insert into a non-transactional 'tm' table completes with +# binary logging as well +# 2 - kill and attempt to restart the server as semisync slave that +# must produce an expected unsafe-to-recover error +# 3 - complete the test with a normal restart that successfully finds and +# commits the transaction in doubt. +# +# ==== References ==== +# +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server +# + +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc + +SET @@global.max_binlog_size= 4096; + +call mtr.add_suppression("Table '.*tm' is marked as crashed and should be repaired"); +call mtr.add_suppression("Got an error from unknown thread"); +call mtr.add_suppression("Checking table: '.*tm'"); +call mtr.add_suppression("Recovering table: '.*tm'"); +call mtr.add_suppression("Cannot truncate the binary log to file"); +call mtr.add_suppression("Crash recovery failed"); +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("Found 1 prepared transactions"); +call mtr.add_suppression("mysqld: Table.*tm.*is marked as crashed"); +call mtr.add_suppression("Checking table.*tm"); + +RESET MASTER; +FLUSH LOGS; +CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +CREATE TABLE tm (f INT) ENGINE=MYISAM; + +--let $row_count = 5 +--let $i = `select $row_count-2` +--disable_query_log +while ($i) +{ + --eval INSERT INTO ti VALUES ($i, REPEAT("x", 1)) + --dec $i +} +--enable_query_log +INSERT INTO tm VALUES(1); + +connect(master1,localhost,root,,); +connect(master2,localhost,root,,); +connect(master3,localhost,root,,); + +--connection master1 + +# The 1st trx binlogs, rotate binlog and hold on before committing at engine +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR master1_go"; +--send_eval INSERT INTO ti VALUES ($row_count - 1, REPEAT("x", 4100)) + +--connection master2 + +# The 2nd trx for recovery, it does not rotate binlog +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go"; +--send_eval INSERT INTO ti VALUES ($row_count, REPEAT("x", 1)) + +--connection master3 +SET DEBUG_SYNC= "now WAIT_FOR master2_ready"; +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master3_ready"; +--send INSERT INTO tm VALUES (2) + +--connection default +SET DEBUG_SYNC= "now WAIT_FOR master3_ready"; +--echo # The gtid binlog state prior the crash must be restored at the end of the test; +SELECT @@global.gtid_binlog_state; +--source include/kill_mysqld.inc + +# +# Server restarts +# +--echo # Failed restart as the semisync slave +--error 1 +--exec $MYSQLD_LAST_CMD --rpl-semi-sync-slave-enabled=1 >> $MYSQLTEST_VARDIR/log/mysqld.1.err 2>&1 + +--echo # Normal restart +--source include/start_mysqld.inc + +# Check error log for correct messages. +let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err; + +--let SEARCH_FILE=$log_error_ +--let SEARCH_PATTERN=Cannot truncate the binary log to file +--source include/search_pattern_in_file.inc + +--echo # Proof that the in-doubt transactions are recovered by the 2nd normal server restart +--eval SELECT COUNT(*) = $row_count as 'True' FROM ti +# myisam table may require repair (which is not tested here) +--disable_warnings +SELECT COUNT(*) <= 1 FROM tm; +--enable_warnings + +--echo # The gtid binlog state prior the crash is restored now +SELECT @@GLOBAL.gtid_binlog_state; +SELECT @@GLOBAL.gtid_binlog_pos; + +--echo # Cleanup +DROP TABLE ti, tm; +--echo End of test diff --git a/mysql-test/suite/mariabackup/include/have_rocksdb.inc b/mysql-test/suite/mariabackup/include/have_rocksdb.inc deleted file mode 100644 index d59f76f6cf3..00000000000 --- a/mysql-test/suite/mariabackup/include/have_rocksdb.inc +++ /dev/null @@ -1,4 +0,0 @@ -if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_NAME = 'rocksdb'`) -{ - --skip Requires rocksdb -}
\ No newline at end of file diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result new file mode 100644 index 00000000000..233f4acbcc0 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result @@ -0,0 +1,129 @@ +include/master-slave.inc +[connection master] +connection server_2; +include/stop_slave.inc +connection server_1; +RESET MASTER; +SET @@global.max_binlog_size= 4096; +connection server_2; +RESET MASTER; +SET @@global.max_binlog_size= 4096; +set @@global.rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos = ""; +CHANGE MASTER TO master_use_gtid= slave_pos; +include/start_slave.inc +connection server_1; +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +set @@global.rpl_semi_sync_master_enabled = 1; +set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC; +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("1 client is using or hasn.t closed the table properly"); +call mtr.add_suppression("Table './mtr/test_suppressions' is marked as crashed and should be repaired"); +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1, 'dummy1'); +connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,; +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO t1 VALUES (2, REPEAT("x", 4100)); +connection server_1; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +# Kill the server +connection server_2; +include/stop_slave.inc +SELECT @@GLOBAL.gtid_current_pos; +@@GLOBAL.gtid_current_pos +0-1-8 +# restart: --rpl-semi-sync-slave-enabled=1 +connection server_1; +FOUND 1 /truncated binlog file:.*master.*000001/ in mysqld.1.err +disconnect conn_client; +connection server_2; +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; +connection server_1; +CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS; +set global rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos=@@global.gtid_binlog_pos; +include/start_slave.inc +connection server_2; +INSERT INTO t1 VALUES (3, 'dummy3'); +# The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-2-9 +connection server_1; +SELECT COUNT(*) = 3 as 'true' FROM t1; +true +1 +# ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +Variable_name Value +gtid_slave_pos 0-2-9 +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-2-9 +connection server_2; +connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,; +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO t1 VALUES (4, REPEAT("x", 4100)); +connect conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con2_go"; +INSERT INTO t1 VALUES (5, REPEAT("x", 4100)); +connection server_2; +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +# Kill the server +connection server_1; +include/stop_slave.inc +SELECT @@GLOBAL.gtid_current_pos; +@@GLOBAL.gtid_current_pos +0-2-11 +# restart: --rpl-semi-sync-slave-enabled=1 +connection server_2; +NOT FOUND /truncated binlog file:.*slave.*000001/ in mysqld.2.err +disconnect conn_client; +connection server_1; +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; +connection server_2; +CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS; +set global rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos=@@global.gtid_binlog_pos; +include/start_slave.inc +connection server_1; +INSERT INTO t1 VALUES (6, 'Done'); +# The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-1-12 +connection server_2; +SELECT COUNT(*) = 6 as 'true' FROM t1; +true +1 +# ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +Variable_name Value +gtid_slave_pos 0-1-12 +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-1-12 +include/diff_tables.inc [server_1:t1, server_2:t1] +# Cleanup +connection server_1; +DROP TABLE t1; +connection server_2; +include/stop_slave.inc +connection server_1; +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; +RESET SLAVE; +RESET MASTER; +connection server_2; +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; +CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no; +include/start_slave.inc +connection default; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc b/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc new file mode 100644 index 00000000000..1f24c42f680 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc @@ -0,0 +1,77 @@ +if ($failover_to_slave) +{ + --let $server_to_crash=1 + --let $server_to_promote=2 + --let $new_master_port=$SERVER_MYPORT_2 + --let $client_port=$SERVER_MYPORT_1 + + --connect (conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,) +} +if (!$failover_to_slave) +{ + --let $server_to_crash=2 + --let $server_to_promote=1 + --let $new_master_port=$SERVER_MYPORT_1 + --let $client_port=$SERVER_MYPORT_2 + + --connect (conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,) +} + + +# Hold insert after write to binlog and before "run_commit_ordered" in engine + +SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go"; +--send_eval $query_to_crash + +# complicate recovery with an extra binlog file +if (!$failover_to_slave) +{ + --connect (conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,) + # use the same signal with $query_to_crash + SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; + SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con2_go"; + --send_eval $query2_to_crash +} + +--connection server_$server_to_crash +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +--source include/kill_mysqld.inc + +--connection server_$server_to_promote +--error 2003 +--source include/stop_slave.inc +SELECT @@GLOBAL.gtid_current_pos; + +--let $restart_parameters=--rpl-semi-sync-slave-enabled=1 +--let $allow_rpl_inited=1 +--source include/start_mysqld.inc + +--connection server_$server_to_crash +--enable_reconnect +--source include/wait_until_connected_again.inc + +# Check error log for correct messages. +let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.$server_to_crash.err; +--let SEARCH_FILE=$log_error_ +--let SEARCH_PATTERN=$log_search_pattern +--source include/search_pattern_in_file.inc + +--disconnect conn_client + +# +# FAIL OVER now to new master +# +--connection server_$server_to_promote +set global rpl_semi_sync_master_enabled = 1; +set global rpl_semi_sync_master_wait_point=AFTER_SYNC; + +--connection server_$server_to_crash +--let $master_port=$SERVER_MYPORT_2 +if (`select $server_to_crash = 2`) +{ + --let $master_port=$SERVER_MYPORT_1 +} +evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS; +set global rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos=@@global.gtid_binlog_pos; +--source include/start_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf new file mode 100644 index 00000000000..f8312bdc5b8 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf @@ -0,0 +1,11 @@ +!include suite/rpl/rpl_1slave_base.cnf +!include include/default_client.cnf + + +[mysqld.1] +log-slave-updates +gtid-strict-mode=1 + +[mysqld.2] +log-slave-updates +gtid-strict-mode=1 diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test new file mode 100644 index 00000000000..2c38cf4da54 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test @@ -0,0 +1,144 @@ +# ==== Purpose ==== +# +# Test verifies replication failover scenario. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Having two servers 1 and 2 enable semi-sync replication with +# with the master wait 'after_sync'. +# 1 - Insert a row. While inserting second row simulate +# a server crash at once the transaction is written to binlog, flushed +# and synced but the binlog position is not updated. +# 2 - Post crash-recovery on the old master execute there CHANGE MASTER +# TO command to connect to server id 2. +# 3 - The old master new slave server 1 must connect to the new +# master server 2. +# 4 - repeat the above to crash the new master and restore in role the old one +# +# ==== References ==== +# +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server + + +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/have_binlog_format_row.inc +--source include/master-slave.inc + +# Initial slave +--connection server_2 +--source include/stop_slave.inc + +# Initial master +--connection server_1 +RESET MASTER; +SET @@global.max_binlog_size= 4096; + +--connection server_2 +RESET MASTER; +SET @@global.max_binlog_size= 4096; +set @@global.rpl_semi_sync_slave_enabled = 1; +set @@global.gtid_slave_pos = ""; +CHANGE MASTER TO master_use_gtid= slave_pos; +--source include/start_slave.inc + + +--connection server_1 +ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; +set @@global.rpl_semi_sync_master_enabled = 1; +set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC; + +call mtr.add_suppression("Can.t init tc log"); +call mtr.add_suppression("Aborting"); +call mtr.add_suppression("1 client is using or hasn.t closed the table properly"); +call mtr.add_suppression("Table './mtr/test_suppressions' is marked as crashed and should be repaired"); + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1, 'dummy1'); + +# +# CRASH the original master, and FAILOVER to the new +# + +# value 1 for server id 1 -> 2 failover +--let $failover_to_slave=1 +--let $query_to_crash= INSERT INTO t1 VALUES (2, REPEAT("x", 4100)) +--let $log_search_pattern=truncated binlog file:.*master.*000001 +--source rpl_semi_sync_crash.inc + +--connection server_2 +--let $rows_so_far=3 +--eval INSERT INTO t1 VALUES ($rows_so_far, 'dummy3') +--save_master_pos +--echo # The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + +--connection server_1 +--sync_with_master +--eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1 +--echo # ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + +--connection server_2 +# +# CRASH the new master and FAILOVER back to the original +# + +# value 0 for the reverse server id 2 -> 1 failover +--let $failover_to_slave=0 +--let $query_to_crash = INSERT INTO t1 VALUES (4, REPEAT("x", 4100)) +--let $query2_to_crash= INSERT INTO t1 VALUES (5, REPEAT("x", 4100)) +--let $log_search_pattern=truncated binlog file:.*slave.*000001 +--source rpl_semi_sync_crash.inc + +--connection server_1 +--let $rows_so_far=6 +--eval INSERT INTO t1 VALUES ($rows_so_far, 'Done') +--save_master_pos +--echo # The gtid state on current master must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + +--connection server_2 +--sync_with_master +--eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1 +--echo # ... the gtid states on the slave: +SHOW VARIABLES LIKE 'gtid_slave_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_pos'; + + +--let $diff_tables=server_1:t1, server_2:t1 +--source include/diff_tables.inc + +# +--echo # Cleanup +# +--connection server_1 +DROP TABLE t1; +--save_master_pos + +--connection server_2 +--sync_with_master +--source include/stop_slave.inc + +--connection server_1 +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; +RESET SLAVE; +RESET MASTER; + +--connection server_2 +set @@global.rpl_semi_sync_master_enabled = 0; +set @@global.rpl_semi_sync_slave_enabled = 0; +set @@global.rpl_semi_sync_master_wait_point=default; + +evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no; +--source include/start_slave.inc + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +--source include/rpl_end.inc diff --git a/sql/handler.cc b/sql/handler.cc index fc2c46395c3..78ee18a4542 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1508,6 +1508,24 @@ uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info) return rw_ha_count; } +/* + Returns counted number of + read-write recoverable transaction participants. +*/ +uint ha_count_rw_2pc(THD *thd, bool all) +{ + unsigned rw_ha_count= 0; + THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt; + + for (Ha_trx_info * ha_info= trans->ha_list; ha_info; + ha_info= ha_info->next()) + { + if (ha_info->is_trx_read_write() && ha_info->ht()->recover) + ++rw_ha_count; + } + return rw_ha_count; +} + /** Check if we can skip the two-phase commit. @@ -1527,7 +1545,6 @@ uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info) engines with read-write changes. */ -static uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, bool all) @@ -1978,6 +1995,24 @@ int ha_commit_one_phase(THD *thd, bool all) DBUG_RETURN(res); } +static bool is_ro_1pc_trans(THD *thd, Ha_trx_info *ha_info, bool all, + bool is_real_trans) +{ + uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all); + bool rw_trans= is_real_trans && + (rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U)); + + return !rw_trans; +} + +static bool has_binlog_hton(Ha_trx_info *ha_info) +{ + bool rc; + for (rc= false; ha_info && !rc; ha_info= ha_info->next()) + rc= ha_info->ht() == binlog_hton; + + return rc; +} static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) @@ -1991,9 +2026,17 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans) if (ha_info) { + int err; + + if (has_binlog_hton(ha_info) && + (err= binlog_commit(thd, all, + is_ro_1pc_trans(thd, ha_info, all, is_real_trans)))) + { + my_error(ER_ERROR_DURING_COMMIT, MYF(0), err); + error= 1; + } for (; ha_info; ha_info= ha_info_next) { - int err; handlerton *ht= ha_info->ht(); if ((err= ht->commit(ht, thd, all))) { @@ -2219,6 +2262,15 @@ int ha_commit_or_rollback_by_xid(XID *xid, bool commit) xaop.xid= xid; xaop.result= 1; + /* + When the binlogging service is enabled complete the transaction + by it first. + */ + if (commit) + binlog_commit_by_xid(binlog_hton, xid); + else + binlog_rollback_by_xid(binlog_hton, xid); + plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &xaop); @@ -2314,7 +2366,7 @@ static my_xid wsrep_order_and_check_continuity(XID *list, int len) recover() step of xa. @note - there are three modes of operation: + there are four modes of operation: - automatic recover after a crash in this case commit_list != 0, tc_heuristic_recover==0 all xids from commit_list are committed, others are rolled back @@ -2325,6 +2377,9 @@ static my_xid wsrep_order_and_check_continuity(XID *list, int len) - no recovery (MySQL did not detect a crash) in this case commit_list==0, tc_heuristic_recover == 0 there should be no prepared transactions in this case. + - automatic recovery for the semisync slave server: uncommitted + transactions are rolled back and when they are in binlog it gets + truncated to the first uncommitted transaction start offset. */ struct xarecover_st { @@ -2332,8 +2387,181 @@ struct xarecover_st XID *list; HASH *commit_list; bool dry_run; + MEM_ROOT *mem_root; + bool error; +}; + +/** + Inserts a new hash member. + + returns a successfully created and inserted @c xid_recovery_member + into hash @c hash_arg, + or NULL. +*/ +static xid_recovery_member* +xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root) +{ + xid_recovery_member *member= (xid_recovery_member*) + alloc_root(ptr_mem_root, sizeof(xid_recovery_member)); + if (!member) + return NULL; + + member->xid= xid_arg; + member->in_engine_prepare= 1; + member->decided_to_commit= false; + + return my_hash_insert(hash_arg, (uchar*) member) ? NULL : member; +} + +/* + Inserts a new or updates an existing hash member to increment + the member's prepare counter. + + returns false on success, + true otherwise. +*/ +static bool xid_member_replace(HASH *hash_arg, my_xid xid_arg, + MEM_ROOT *ptr_mem_root) +{ + xid_recovery_member* member; + if ((member= (xid_recovery_member *) + my_hash_search(hash_arg, (uchar *)& xid_arg, sizeof(xid_arg)))) + member->in_engine_prepare++; + else + member= xid_member_insert(hash_arg, xid_arg, ptr_mem_root); + + return member == NULL; +} + +/* + A "transport" type for recovery completion with ha_recover_complete() +*/ +struct xarecover_complete_arg +{ + xid_recovery_member* member; + Binlog_offset *binlog_coord; + uint count; }; +/* + Flagged to commit member confirms to get committed. + Otherwise when + A. ptr_commit_max is NULL (implies the normal recovery), or + B. it's not NULL (can only be so in the semisync slave case) + and the value referenced is not greater than the member's coordinate + the decision is to rollback. + When both A,B do not hold - which is the semisync slave recovery + case - the decision is to commit. + + Returns true as commmit decision + false as rollback one +*/ +static bool xarecover_decide_to_commit(xid_recovery_member* member, + Binlog_offset *ptr_commit_max) +{ + return + member->decided_to_commit ? true : + !ptr_commit_max ? false : + (member->binlog_coord < *ptr_commit_max ? // semisync slave recovery + true : false); +} + +/* + Helper function for xarecover_do_commit_or_rollback_handlerton. + For a given hton decides what to do with a xid passed in the 2nd arg + and carries out the decision. +*/ +static void xarecover_do_commit_or_rollback(handlerton *hton, + xarecover_complete_arg *arg) +{ + xid_t x; + my_bool rc; + xid_recovery_member *member= arg->member; + Binlog_offset *ptr_commit_max= arg->binlog_coord; + + x.set(member->xid); + + rc= xarecover_decide_to_commit(member, ptr_commit_max) ? + hton->commit_by_xid(hton, &x) : hton->rollback_by_xid(hton, &x); + + /* + It's fine to have non-zero rc which would be from transaction + non-participant hton:s. + */ + DBUG_ASSERT(rc || member->in_engine_prepare > 0); + + if (!rc) + { + /* + This block relies on Engine to report XAER_NOTA at + "complete"_by_xid for unknown xid. + */ + member->in_engine_prepare--; + if (global_system_variables.log_warnings > 2) + sql_print_information("%s transaction with xid %llu", + member->decided_to_commit ? "Committed" : + "Rolled back", (ulonglong) member->xid); + } +} + +/* + Per hton recovery decider function. +*/ +static my_bool xarecover_do_commit_or_rollback_handlerton(THD *unused, + plugin_ref plugin, + void *arg) +{ + handlerton *hton= plugin_hton(plugin); + + if (hton->recover) + { + xarecover_do_commit_or_rollback(hton, (xarecover_complete_arg *) arg); + } + + return FALSE; +} + +/* + Completes binlog recovery for an input xid in the passed + member_arg to invoke decider functions for each handlerton. + + Returns always FALSE. +*/ +static my_bool xarecover_complete_and_count(void *member_arg, + void *param_arg) +{ + xid_recovery_member *member= (xid_recovery_member*) member_arg; + xarecover_complete_arg *complete_params= + (xarecover_complete_arg*) param_arg; + complete_params->member= member; + + (void) plugin_foreach(NULL, xarecover_do_commit_or_rollback_handlerton, + MYSQL_STORAGE_ENGINE_PLUGIN, complete_params); + + if (member->in_engine_prepare) + { + complete_params->count++; + if (global_system_variables.log_warnings > 2) + sql_print_warning("Found prepared transaction with xid %llu", + (ulonglong) member->xid); + } + + return false; +} + +/* + Completes binlog recovery to invoke decider functions for + each xid. + Returns the number of transactions remained doubtful. +*/ +uint ha_recover_complete(HASH *commit_list, Binlog_offset *coord) +{ + xarecover_complete_arg complete= { NULL, coord, 0 }; + (void) my_hash_iterate(commit_list, xarecover_complete_and_count, &complete); + + return complete.count; +} + static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, void *arg) { @@ -2395,7 +2623,20 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, info->found_my_xids++; continue; } - // recovery mode + + /* + Regular and semisync slave server recovery only collects + xids to make decisions on them later by the caller. + */ + if (info->mem_root) + { + if (xid_member_replace(info->commit_list, x, info->mem_root)) + { + info->error= true; + sql_print_error("Error in memory allocation at xarecover_handlerton"); + break; + } + } else if (IF_WSREP((wsrep_emulate_bin_log && wsrep_is_wsrep_xid(info->list + i) && x <= wsrep_limit), false) || @@ -2431,7 +2672,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin, return FALSE; } -int ha_recover(HASH *commit_list) +int ha_recover(HASH *commit_list, MEM_ROOT *arg_mem_root) { struct xarecover_st info; DBUG_ENTER("ha_recover"); @@ -2439,6 +2680,8 @@ int ha_recover(HASH *commit_list) info.commit_list= commit_list; info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0); info.list= NULL; + info.mem_root= arg_mem_root; + info.error= false; /* commit_list and tc_heuristic_recover cannot be set both */ DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0); @@ -2483,6 +2726,9 @@ int ha_recover(HASH *commit_list) info.found_my_xids, opt_tc_log_file); DBUG_RETURN(1); } + if (info.error) + DBUG_RETURN(1); + if (info.commit_list) sql_print_information("Crash table recovery finished."); DBUG_RETURN(0); diff --git a/sql/handler.h b/sql/handler.h index a7c455ae7c9..d2bb514f5cf 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -44,6 +44,7 @@ #include <mysql/psi/mysql_table.h> #include "sql_sequence.h" #include "mem_root_array.h" +#include <utility> // pair class Alter_info; class Virtual_column_info; @@ -931,6 +932,32 @@ struct xid_t { }; typedef struct xid_t XID; +/* + Enumerates a sequence in the order of + their creation that is in the top-down order of the index file. + Ranges from zero through MAX_binlog_id. + Not confuse the value with the binlog file numerical suffix, + neither with the binlog file line in the binlog index file. +*/ +typedef uint Binlog_file_id; +const Binlog_file_id MAX_binlog_id= UINT_MAX; +/* + Compound binlog-id and byte offset of transaction's first event + in a sequence (e.g the recovery sequence) of binlog files. + Binlog_offset(0,0) is the minimum value to mean + the first byte of the first binlog file. +*/ +typedef std::pair<Binlog_file_id, my_off_t> Binlog_offset; + +/* binlog-based recovery transaction descriptor */ +struct xid_recovery_member +{ + my_xid xid; + uint in_engine_prepare; // number of engines that have xid prepared + bool decided_to_commit; + Binlog_offset binlog_coord; // semisync recovery binlog offset +}; + /* for recover() handlerton call */ #define MIN_XID_LIST_SIZE 128 #define MAX_XID_LIST_SIZE (1024*128) @@ -5320,7 +5347,8 @@ int ha_commit_one_phase(THD *thd, bool all); int ha_commit_trans(THD *thd, bool all); int ha_rollback_trans(THD *thd, bool all); int ha_prepare(THD *thd); -int ha_recover(HASH *commit_list); +int ha_recover(HASH *commit_list, MEM_ROOT *mem_root= NULL); +uint ha_recover_complete(HASH *commit_list, Binlog_offset *coord= NULL); /* transactions: these functions never call handlerton functions directly */ int ha_enable_transaction(THD *thd, bool on); @@ -5448,4 +5476,8 @@ int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info); int del_global_table_stat(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table); uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info); bool non_existing_table_error(int error); +uint ha_count_rw_2pc(THD *thd, bool all); +uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list, + bool all); + #endif /* HANDLER_INCLUDED */ diff --git a/sql/log.cc b/sql/log.cc index f1fef44e05f..047d644e5da 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -63,6 +63,12 @@ #include "wsrep_trans_observer.h" #endif /* WITH_WSREP */ +#ifdef HAVE_REPLICATION +#include "semisync_master.h" +#include "semisync_slave.h" +#include <utility> // pair +#endif + /* max size of the log message */ #define MAX_LOG_BUFFER_SIZE 1024 #define MAX_TIME_SIZE 32 @@ -88,16 +94,12 @@ static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv); static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv); static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton, THD *thd); -static int binlog_commit(handlerton *hton, THD *thd, bool all); static int binlog_rollback(handlerton *hton, THD *thd, bool all); static int binlog_prepare(handlerton *hton, THD *thd, bool all); -static int binlog_xa_recover_dummy(handlerton *hton, XID *xid_list, uint len); -static int binlog_commit_by_xid(handlerton *hton, XID *xid); -static int binlog_rollback_by_xid(handlerton *hton, XID *xid); static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd); static int binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr, Log_event *end_ev, bool all, bool using_stmt, - bool using_trx); + bool using_trx, bool is_ro_1pc); static const LEX_CSTRING write_error_msg= { STRING_WITH_LEN("error writing to the binary log") }; @@ -1693,17 +1695,13 @@ int binlog_init(void *p) binlog_hton->savepoint_rollback= binlog_savepoint_rollback; binlog_hton->savepoint_rollback_can_release_mdl= binlog_savepoint_rollback_can_release_mdl; - binlog_hton->commit= binlog_commit; + binlog_hton->commit= [](handlerton *, THD *thd, bool all) { return 0; }; binlog_hton->rollback= binlog_rollback; binlog_hton->drop_table= [](handlerton *, const char*) { return -1; }; if (WSREP_ON || opt_bin_log) { binlog_hton->prepare= binlog_prepare; binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot; - binlog_hton->commit_by_xid= binlog_commit_by_xid; - binlog_hton->rollback_by_xid= binlog_rollback_by_xid; - // recover needs to be set to make xa{commit,rollback}_handlerton effective - binlog_hton->recover= binlog_xa_recover_dummy; } binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN | HTON_NO_ROLLBACK; return 0; @@ -1770,7 +1768,7 @@ static int binlog_close_connection(handlerton *hton, THD *thd) static int binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr, Log_event *end_ev, bool all, bool using_stmt, - bool using_trx) + bool using_trx, bool is_ro_1pc= false) { int error= 0; DBUG_ENTER("binlog_flush_cache"); @@ -1797,7 +1795,8 @@ binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr, */ error= mysql_bin_log.write_transaction_to_binlog(thd, cache_mngr, end_ev, all, - using_stmt, using_trx); + using_stmt, using_trx, + is_ro_1pc); } else { @@ -1871,7 +1870,8 @@ inline size_t serialize_with_xid(XID *xid, char *buf, nonzero if an error pops up when flushing the cache. */ static inline int -binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr) +binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr, + bool ro_1pc) { DBUG_ENTER("binlog_commit_flush_trx_cache"); @@ -1892,7 +1892,7 @@ binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr) } Query_log_event end_evt(thd, buf, buflen, TRUE, TRUE, TRUE, 0); - DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE)); + DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE, ro_1pc)); } @@ -2011,35 +2011,30 @@ inline bool is_preparing_xa(THD *thd) static int binlog_prepare(handlerton *hton, THD *thd, bool all) { /* Do nothing unless the transaction is a user XA. */ - return is_preparing_xa(thd) ? binlog_commit(NULL, thd, all) : 0; -} - - -static int binlog_xa_recover_dummy(handlerton *hton __attribute__((unused)), - XID *xid_list __attribute__((unused)), - uint len __attribute__((unused))) -{ - /* Does nothing. */ - return 0; + return is_preparing_xa(thd) ? binlog_commit(thd, all, FALSE) : 0; } -static int binlog_commit_by_xid(handlerton *hton, XID *xid) +int binlog_commit_by_xid(handlerton *hton, XID *xid) { THD *thd= current_thd; + if (thd->is_current_stmt_binlog_disabled()) + return 0; (void) thd->binlog_setup_trx_data(); DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_COMMIT); - return binlog_commit(hton, thd, TRUE); + return binlog_commit(thd, TRUE, FALSE); } -static int binlog_rollback_by_xid(handlerton *hton, XID *xid) +int binlog_rollback_by_xid(handlerton *hton, XID *xid) { THD *thd= current_thd; + if (thd->is_current_stmt_binlog_disabled()) + return 0; (void) thd->binlog_setup_trx_data(); DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_ROLLBACK || @@ -2123,20 +2118,17 @@ static int binlog_commit_flush_xa_prepare(THD *thd, bool all, return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE)); } - /** This function is called once after each statement. It has the responsibility to flush the caches to the binary log on commits. - @param hton The binlog handlerton. @param thd The client thread that executes the transaction. @param all This is @c true if this is a real transaction commit, and @false otherwise. - - @see handlerton::commit + @param ro_1pc read-only one-phase commit transaction */ -static int binlog_commit(handlerton *hton, THD *thd, bool all) +int binlog_commit(THD *thd, bool all, bool ro_1pc) { int error= 0; PSI_stage_info org_stage; @@ -2168,7 +2160,6 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all) YESNO(thd->transaction->all.modified_non_trans_table), YESNO(thd->transaction->stmt.modified_non_trans_table))); - thd->backup_stage(&org_stage); THD_STAGE_INFO(thd, stage_binlog_write); if (!cache_mngr->stmt_cache.empty()) @@ -2197,8 +2188,9 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all) { error= is_preparing_xa(thd) ? binlog_commit_flush_xa_prepare(thd, all, cache_mngr) : - binlog_commit_flush_trx_cache (thd, all, cache_mngr); + binlog_commit_flush_trx_cache (thd, all, cache_mngr, ro_1pc); } + /* This is part of the stmt rollback. */ @@ -6228,7 +6220,8 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, bool MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone, - bool is_transactional, uint64 commit_id) + bool is_transactional, uint64 commit_id, + bool has_xid, bool is_ro_1pc) { rpl_gtid gtid; uint32 domain_id; @@ -6281,7 +6274,7 @@ MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone, Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone, LOG_EVENT_SUPPRESS_USE_F, is_transactional, - commit_id); + commit_id, has_xid, is_ro_1pc); /* Write the event to the binary log. */ DBUG_ASSERT(this == &mysql_bin_log); @@ -7610,7 +7603,8 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, binlog_cache_mngr *cache_mngr, Log_event *end_ev, bool all, bool using_stmt_cache, - bool using_trx_cache) + bool using_trx_cache, + bool is_ro_1pc) { group_commit_entry entry; Ha_trx_info *ha_info; @@ -7639,6 +7633,7 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, entry.using_trx_cache= using_trx_cache; entry.need_unlog= is_preparing_xa(thd); ha_info= all ? thd->transaction->all.ha_list : thd->transaction->stmt.ha_list; + entry.ro_1pc= is_ro_1pc; for (; !entry.need_unlog && ha_info; ha_info= ha_info->next()) { @@ -8513,10 +8508,13 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry, uint64 commit_id) { binlog_cache_mngr *mngr= entry->cache_mngr; + bool has_xid= entry->end_event->get_type_code() == XID_EVENT; + DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt"); if (write_gtid_event(entry->thd, is_prepared_xa(entry->thd), - entry->using_trx_cache, commit_id)) + entry->using_trx_cache, commit_id, + has_xid, entry->ro_1pc)) DBUG_RETURN(ER_ERROR_ON_WRITE); if (entry->using_stmt_cache && !mngr->stmt_cache.empty() && @@ -9247,6 +9245,11 @@ TC_LOG::run_commit_ordered(THD *thd, bool all) if (!ht->commit_ordered) continue; ht->commit_ordered(ht, thd, all); + DBUG_EXECUTE_IF("enable_log_write_upto_crash", + { + DBUG_SET_INITIAL("+d,crash_after_log_write_upto"); + sleep(1000); + }); DEBUG_SYNC(thd, "commit_after_run_commit_ordered"); } } @@ -9979,6 +9982,151 @@ int TC_LOG::using_heuristic_recover() /****** transaction coordinator log for 2pc - binlog() based solution ******/ #define TC_LOG_BINLOG MYSQL_BIN_LOG +/** + Truncates the current binlog to specified position. Removes the rest of binlogs + which are present after this binlog file. + + @param truncate_file Holds the binlog name to be truncated + @param truncate_pos Position within binlog from where it needs to + truncated. + + @retval true ok + @retval false error + +*/ +bool MYSQL_BIN_LOG::truncate_and_remove_binlogs(const char *file_name, + my_off_t pos, + rpl_gtid *ptr_gtid) +{ + int error= 0; +#ifdef HAVE_REPLICATION + LOG_INFO log_info; + THD *thd= current_thd; + my_off_t index_file_offset= 0; + File file= -1; + MY_STAT s; + my_off_t old_size; + + if ((error= find_log_pos(&log_info, file_name, 1))) + { + sql_print_error("Failed to locate binary log file:%s." + "Error:%d", file_name, error); + goto end; + } + + while (!(error= find_next_log(&log_info, 1))) + { + if (!index_file_offset) + { + index_file_offset= log_info.index_file_start_offset; + if ((error= open_purge_index_file(TRUE))) + { + sql_print_error("Failed to open purge index " + "file:%s. Error:%d", purge_index_file_name, error); + goto end; + } + } + if ((error= register_purge_index_entry(log_info.log_file_name))) + { + sql_print_error("Failed to copy %s to purge index" + " file. Error:%d", log_info.log_file_name, error); + goto end; + } + } + + if (error != LOG_INFO_EOF) + { + sql_print_error("Failed to find the next binlog to " + "add to purge index register. Error:%d", error); + goto end; + } + + if (is_inited_purge_index_file()) + { + if (!index_file_offset) + index_file_offset= log_info.index_file_start_offset; + + if ((error= sync_purge_index_file())) + { + sql_print_error("Failed to flush purge index " + "file. Error:%d", error); + goto end; + } + + // Trim index file + error= mysql_file_chsize(index_file.file, index_file_offset, '\n', + MYF(MY_WME)); + if (!error) + error= mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)); + if (error) + { + sql_print_error("Failed to truncate binlog index " + "file:%s to offset:%llu. Error:%d", index_file_name, + index_file_offset, error); + goto end; + } + + /* Reset data in old index cache */ + if ((error= reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 1))) + { + sql_print_error("Failed to reinit binlog index " + "file. Error:%d", error); + goto end; + } + + /* Read each entry from purge_index_file and delete the file. */ + if ((error= purge_index_entry(thd, NULL, TRUE))) + { + sql_print_error("Failed to process registered " + "files that would be purged."); + goto end; + } + } + + DBUG_ASSERT(pos); + + if ((file= mysql_file_open(key_file_binlog, file_name, + O_RDWR | O_BINARY, MYF(MY_WME))) < 0) + { + error= 1; + sql_print_error("Failed to open binlog file:%s for " + "truncation.", file_name); + goto end; + } + my_stat(file_name, &s, MYF(0)); + old_size= s.st_size; + clear_inuse_flag_when_closing(file); + /* Change binlog file size to truncate_pos */ + error= mysql_file_chsize(file, pos, 0, MYF(MY_WME)); + if (!error) + error= mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)); + if (error) + { + sql_print_error("Failed to truncate the " + "binlog file:%s to size:%llu. Error:%d", + file_name, pos, error); + goto end; + } + else + { + char buf[21]; + longlong10_to_str(ptr_gtid->seq_no, buf, 10); + sql_print_information("Successfully truncated binlog file:%s " + "from previous file size %llu " + "to pos:%llu to remove transactions starting from " + "GTID %u-%u-%s", + file_name, old_size, pos, + ptr_gtid->domain_id, ptr_gtid->server_id, buf); + } + +end: + if (file >= 0) + mysql_file_close(file, MYF(MY_WME)); + + error= error || close_purge_index_file(); +#endif + return error > 0; +} int TC_LOG_BINLOG::open(const char *opt_name) { int error= 1; @@ -10454,7 +10602,520 @@ start_binlog_background_thread() return 0; } +#ifdef HAVE_REPLICATION +class Recovery_context +{ +public: + my_off_t prev_event_pos; + rpl_gtid last_gtid; + bool last_gtid_standalone; + bool last_gtid_valid; + bool last_gtid_no2pc; // true when the group does not end with Xid event + uint last_gtid_engines; + Binlog_offset last_gtid_coord; // <binlog id, binlog offset> + /* + When true, it's semisync slave recovery mode + rolls back transactions in doubt and wipes them off from binlog. + The rest of declarations deal with this type of recovery. + */ + bool do_truncate; + /* + transaction-in-doubt's gtid:s. `truncate_gtid` is the ultimate value, + if it's non-zero truncation is taking place to start from it. + Its value gets refined throughout binlog scanning conducted with at most + 2 rounds. + When an estimate is done in the 1st round of 2-round recovery its value + gets memorized for possible adoption as the ultimate `truncate_gtid`. + */ + rpl_gtid truncate_gtid, truncate_gtid_1st_round; + /* + the last non-transactional group that is located in binlog + behind truncate_gtid. + */ + rpl_gtid binlog_unsafe_gtid; + char binlog_truncate_file_name[FN_REFLEN] ; + char binlog_unsafe_file_name[FN_REFLEN] ; + /* + When do_truncate is true, the truncate position may not be + found in one round when recovered transactions are multi-engine + or just on different engines. + In the single recoverable engine case `truncate_reset_done` and + therefore `truncate_validated` remains `false` when the last + binlog is the binlog-checkpoint one. + The meaning of `truncate_reset_done` is according to the following example: + Let round = 1, Binlog contains the sequence of replication event groups: + [g1, G2, g3] + where `G` (in capital) stands for committed, `g` for prepared. + g1 is first set as truncation candidate, then G2 reset it to indicate + the actual truncation is behind (to the right of) it. + `truncate_validated` is set to true when `binlog_truncate_pos` (as of `g3`) + won't change. + Observe last_gtid_valid is affected, so in the above example `g1` that + was initially ignored for the gtid binlog state now seeing `G2` + would have to be added to it. See gtid_maybe_to_truncate. + */ + bool truncate_validated; // trued when the truncate position settled + bool truncate_reset_done; // trued when the position is to reevaluate + /* Flags the fact of truncate position estimation is done the 1st round */ + bool truncate_set_in_1st; + /* + Monotonically indexes binlog files in the recovery list. + When the list is "likely" singleton the value is UINT_MAX. + Otherwise enumeration starts with zero for the first file, increments + by one for any next file except for the last file in the list, which + is also the initial binlog file for recovery, + that is enumberated with UINT_MAX. + */ + Binlog_file_id id_binlog; + enum_binlog_checksum_alg checksum_alg; + Binlog_offset binlog_truncate_coord, + binlog_truncate_coord_1st_round; // pair is similar to truncate_gtid + Binlog_offset binlog_unsafe_coord; + /* + Populated at decide_or_assess() with gtid-in-doubt whose + binlog offset greater of equal by that of the current gtid truncate + candidate. + Gets empited by reset_truncate_coord into gtid binlog state. + */ + Dynamic_array<rpl_gtid> *gtid_maybe_to_truncate; + Recovery_context(); + ~Recovery_context() { delete gtid_maybe_to_truncate; } + /* + Completes the recovery procedure. + In the normal case prepared xids gets committed when they also found + in binlog, otherwise they are rolled back. + In the semisync slave case the xids that are located in binlog in + a truncated tail get rolled back, otherwise they are committed. + Both decisions are contingent on safety to truncate. + */ + bool complete(MYSQL_BIN_LOG *log, HASH &xids); + + /* + decides on commit of xid passed through member argument. + In the semisync slave case it assigns binlog coordinate to + any xid that remains in-doubt. Decision on them will be + done after binlog scan rounds. + */ + bool decide_or_assess(xid_recovery_member *member, int round, + Format_description_log_event *fdle, + LOG_INFO *linfo, my_off_t pos); + + /* + Assigns last_gtid and assesses the maximum (in the binlog offset term) + unsafe gtid (group of events). + */ + void process_gtid(int round, Gtid_log_event *gev, LOG_INFO *linfo); + + /* + Compute next action at the end of processing of the current binlog file. + It may increment the round. + When the round turns in the semisync-slave recovery + binlog_id, truncate_validated, truncate_reset_done + gets reset/set for the next round. + Within the 2nd round id_binlog keeps incrementing. + + Passed arguments: + round the current round that *may* be increment here + last_log_name the recovery starting binlog file + binlog_checkpoint_name + binlog checkpoint file + linfo binlog file list struct for next file + log pointer to mysql_bin_log instance + + Returns: 0 when rounds continue, maybe the current one remains + 1 when all rounds are done + */ + int next_binlog_or_round(int& round, + const char *last_log_name, + const char *binlog_checkpoint_name, + LOG_INFO *linfo, MYSQL_BIN_LOG *log); + /* + Relates to the semisync recovery. + Returns true when truncated tail does not contain non-transactional + group of events. + Otherwise returns false. + */ + bool is_safe_to_truncate() + { + return !do_truncate ? true : + (truncate_gtid.seq_no == 0 || // no truncate + binlog_unsafe_coord < binlog_truncate_coord); // or unsafe is earlier + } + + /* + Relates to the semisync recovery. + Is invoked when a standalone or non-2pc group is detected. + Both are unsafe to truncate in the semisync-slave recovery so + the maximum unsafe coordinate may be updated. + In the non-2pc group case though, *exeptionally*, + the no-engine group is considered safe, to be invalidated + to not contribute to binlog state. + */ + void update_binlog_unsafe_coord_if_needed(LOG_INFO *linfo); + + /* + Relates to the semisync recovery. + Is called when a committed or decided to-commit transaction is detected. + Actions: + truncate_gtid then is set to "nil" as indicated by rpl_gtid::seq_no := 0. + truncate_reset_done takes a note of that fact. + binlog_truncate_coord gets reset to the current gtid offset merely to + "suggest" any potential future truncate gtid must have a greater offset. + gtid_maybe_to_truncate gets emptied into gtid binlog state. + + Returns: + false on success, otherwise + true when OOM at rpl_global_gtid_binlog_state insert + */ + bool reset_truncate_coord(my_off_t pos); + + /* + Sets binlog_truncate_pos to the value of the current transaction's gtid. + In multi-engine case that might be just an assessment to be refined + in the current round and confirmed in a next one. + gtid_maybe_to_truncate receives the current gtid as a new element. + Returns + false on success, otherwise + true when OOM at gtid_maybe_to_truncate append + + */ + bool set_truncate_coord(LOG_INFO *linfo, int round, + enum_binlog_checksum_alg fd_checksum_alg); +}; + +bool Recovery_context::complete(MYSQL_BIN_LOG *log, HASH &xids) +{ + if (!do_truncate || is_safe_to_truncate()) + { + uint count_in_prepare= + ha_recover_complete(&xids, + !do_truncate ? NULL : + (truncate_gtid.seq_no > 0 ? + &binlog_truncate_coord : &last_gtid_coord)); + + if (count_in_prepare > 0 && global_system_variables.log_warnings > 2) + { + sql_print_warning("Could not complete %u number of transactions.", + count_in_prepare); + return false; // there's later dry run ha_recover() to error out + } + } + + /* Truncation is not done when there's no transaction to roll back */ + if (do_truncate && truncate_gtid.seq_no > 0) + { + if (is_safe_to_truncate()) + { + if (log->truncate_and_remove_binlogs(binlog_truncate_file_name, + binlog_truncate_coord.second, + &truncate_gtid)) + { + sql_print_error("Failed to truncate the binary log to " + "file:%s pos:%llu.", binlog_truncate_file_name, + binlog_truncate_coord.second); + return true; + } + } + else + { + sql_print_error("Cannot truncate the binary log to file:%s " + "pos:%llu as unsafe statement " + "is found at file:%s pos:%llu which is " + "beyond the truncation position;" + "all transactions in doubt are left intact. ", + binlog_truncate_file_name, binlog_truncate_coord.second, + binlog_unsafe_file_name, binlog_unsafe_coord.second); + return true; + } + } + + return false; +} + +Recovery_context::Recovery_context() : + prev_event_pos(0), + last_gtid_standalone(false), last_gtid_valid(false), last_gtid_no2pc(false), + last_gtid_engines(0), + do_truncate(rpl_semi_sync_slave_enabled), + truncate_validated(false), truncate_reset_done(false), + truncate_set_in_1st(false), id_binlog(MAX_binlog_id), + checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF), gtid_maybe_to_truncate(NULL) +{ + last_gtid_coord= Binlog_offset(0,0); + binlog_truncate_coord= binlog_truncate_coord_1st_round= Binlog_offset(0,0); + binlog_unsafe_coord= Binlog_offset(0,0); + binlog_truncate_file_name[0]= 0; + binlog_unsafe_file_name [0]= 0; + binlog_unsafe_gtid= truncate_gtid= truncate_gtid_1st_round= rpl_gtid(); + if (do_truncate) + gtid_maybe_to_truncate= new Dynamic_array<rpl_gtid>(16, 16); +} + +bool Recovery_context::reset_truncate_coord(my_off_t pos) +{ + DBUG_ASSERT(binlog_truncate_coord.second == 0 || + last_gtid_coord >= binlog_truncate_coord || + truncate_set_in_1st); + // save as backup to restore at next_binlog_or_round when necessary + if (truncate_set_in_1st && truncate_gtid_1st_round.seq_no == 0) + { + truncate_gtid_1st_round= truncate_gtid; + binlog_truncate_coord_1st_round= binlog_truncate_coord; + } + binlog_truncate_coord= Binlog_offset(id_binlog, pos); + truncate_gtid= rpl_gtid(); + truncate_reset_done= true; + for (uint i= 0; i < gtid_maybe_to_truncate->elements(); i++) + { + rpl_gtid gtid= gtid_maybe_to_truncate->at(i); + if (rpl_global_gtid_binlog_state.update_nolock(>id, false)) + return true; + } + gtid_maybe_to_truncate->clear(); + + return false; +} + +bool Recovery_context::set_truncate_coord(LOG_INFO *linfo, int round, + enum_binlog_checksum_alg fd_checksum) +{ + binlog_truncate_coord= last_gtid_coord; + strmake_buf(binlog_truncate_file_name, linfo->log_file_name); + + truncate_gtid= last_gtid; + checksum_alg= fd_checksum; + truncate_set_in_1st= (round == 1); + + return gtid_maybe_to_truncate->append(last_gtid); +} + +bool Recovery_context::decide_or_assess(xid_recovery_member *member, int round, + Format_description_log_event *fdle, + LOG_INFO *linfo, my_off_t pos) +{ + if (member) + { + /* + xid in doubt are resolved as follows: + in_engine_prepare is compared agaist binlogged info to + yield the commit-or-rollback decision in the normal case. + In the semisync-slave recovery the decision is done later + after the binlog scanning has determined the truncation offset. + */ + if (member->in_engine_prepare > last_gtid_engines) + { + char buf[21]; + longlong10_to_str(last_gtid.seq_no, buf, 10); + sql_print_error("Error to recovery multi-engine transaction: " + "the number of engines prepared %u exceeds the " + "respective number %u in its GTID %u-%u-%s " + "located at file:%s pos:%llu", + member->in_engine_prepare, last_gtid_engines, + last_gtid.domain_id, last_gtid.server_id, buf, + linfo->log_file_name, last_gtid_coord.second); + return true; + } + else if (member->in_engine_prepare < last_gtid_engines) + { + DBUG_ASSERT(member->in_engine_prepare > 0); + /* + This is an "unlikely" branch of two or more engines in transaction + that is partially committed, so to complete. + */ + member->decided_to_commit= true; + if (do_truncate) + { + /* Validated truncate at this point can be only in the 2nd round. */ + DBUG_ASSERT(!truncate_validated || + (round == 2 && truncate_set_in_1st && + last_gtid_coord < binlog_truncate_coord)); + /* + Estimated truncate must not be greater than the current one's + offset, unless the turn of the rounds. + */ + DBUG_ASSERT(truncate_validated || + (last_gtid_coord >= binlog_truncate_coord || + (round == 2 && truncate_set_in_1st))); + + if (!truncate_validated && reset_truncate_coord(pos)) + return true; + } + } + else // member->in_engine_prepare == last_gtid_engines + { + if (!do_truncate) // "normal" recovery + { + member->decided_to_commit= true; + } + else + { + member->binlog_coord= last_gtid_coord; + last_gtid_valid= false; + /* + First time truncate position estimate before its validation. + An estimate may change to involve reset_truncate_coord call. + */ + if (!truncate_validated) + { + if (truncate_gtid.seq_no == 0 /* was reset or never set */ || + (truncate_set_in_1st && round == 2 /* reevaluted at round turn */)) + { + if (set_truncate_coord(linfo, round, fdle->checksum_alg)) + return true; + } + else + { + /* Truncate estimate was done ago, this gtid can't improve it. */ + DBUG_ASSERT(last_gtid_coord >= binlog_truncate_coord); + + gtid_maybe_to_truncate->append(last_gtid); + } + + DBUG_ASSERT(member->decided_to_commit == false); // may redecided + } + else + { + /* + binlog truncate was determined, possibly to none, otherwise + its offset greater than that of the current gtid. + */ + DBUG_ASSERT(truncate_gtid.seq_no == 0 || + last_gtid_coord < binlog_truncate_coord); + member->decided_to_commit= true; + } + } + } + } + else if (do_truncate) // "0" < last_gtid_engines + { + /* + Similar to the partial commit branch above. + */ + DBUG_ASSERT(!truncate_validated || last_gtid_coord < binlog_truncate_coord); + DBUG_ASSERT(truncate_validated || + (last_gtid_coord >= binlog_truncate_coord || + (round == 2 && truncate_set_in_1st))); + + if (!truncate_validated && reset_truncate_coord(pos)) + return true; + } + + return false; +} + +void Recovery_context::update_binlog_unsafe_coord_if_needed(LOG_INFO *linfo) +{ + if (!do_truncate) + return; + + if (truncate_gtid.seq_no > 0 && // g1,U2, *not* G1,U2 + last_gtid_coord > binlog_truncate_coord) + { + DBUG_ASSERT(binlog_truncate_coord.second > 0); + /* + Potentially unsafe when the truncate coordinate is not determined, + just detected as unsafe when behind the latter. + */ + if (last_gtid_engines == 0) + { + last_gtid_valid= false; + } + else + { + binlog_unsafe_gtid= last_gtid; + binlog_unsafe_coord= last_gtid_coord; + strmake_buf(binlog_unsafe_file_name, linfo->log_file_name); + } + } +} + +void Recovery_context::process_gtid(int round, Gtid_log_event *gev, + LOG_INFO *linfo) +{ + last_gtid.domain_id= gev->domain_id; + last_gtid.server_id= gev->server_id; + last_gtid.seq_no= gev->seq_no; + last_gtid_engines= gev->extra_engines != UCHAR_MAX ? + gev->extra_engines + 1 : 0; + last_gtid_coord= Binlog_offset(id_binlog, prev_event_pos); + DBUG_ASSERT(!last_gtid_valid); + DBUG_ASSERT(!last_gtid.seq_no == 0); + + if (round == 1 || (do_truncate && !truncate_validated)) + { + DBUG_ASSERT(!last_gtid_valid); + + last_gtid_no2pc= false; + last_gtid_standalone= + (gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false; + if (do_truncate && last_gtid_standalone) + update_binlog_unsafe_coord_if_needed(linfo); + /* Update the binlog state with any 'valid' GTID logged after Gtid_list. */ + last_gtid_valid= true; // may flip at Xid when falls to truncate + } +} + +int Recovery_context::next_binlog_or_round(int& round, + const char *last_log_name, + const char *binlog_checkpoint_name, + LOG_INFO *linfo, + MYSQL_BIN_LOG *log) +{ + if (!strcmp(linfo->log_file_name, last_log_name)) + { + /* Exit the loop now at the end of the current round. */ + DBUG_ASSERT(round <= 2); + + if (do_truncate) + { + truncate_validated= truncate_reset_done; + truncate_reset_done= false; + /* + Restore the 1st round saved estimate if it was not refined in the 2nd. + That can only occur in multiple log files context when the inital file + has a truncation candidate (a `g`) and does not have any commited `G`, + *and* other files (binlog-checkpoint one and so on) do not have any + transaction-in-doubt. + */ + if (truncate_gtid.seq_no == 0 && truncate_set_in_1st) + { + DBUG_ASSERT(truncate_gtid_1st_round.seq_no > 0); + + truncate_gtid= truncate_gtid_1st_round; + binlog_truncate_coord= binlog_truncate_coord_1st_round; + } + } + return 1; + } + else if (round == 1) + { + if (do_truncate) + { + truncate_validated= truncate_reset_done; + if (!truncate_validated) + { + rpl_global_gtid_binlog_state.reset_nolock(); + gtid_maybe_to_truncate->clear(); + } + truncate_reset_done= false; + id_binlog= 0; + } + round++; + } + else if (do_truncate) // binlog looping within round 2 + { + id_binlog++; + + DBUG_ASSERT(id_binlog <= MAX_binlog_id); // the assert is "practical" + } + + DBUG_ASSERT(!do_truncate || id_binlog != MAX_binlog_id || + !strcmp(linfo->log_file_name, binlog_checkpoint_name)); + + return 0; +} +#endif /* Execute recovery of the binary log @@ -10476,16 +11137,25 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, MEM_ROOT mem_root; char binlog_checkpoint_name[FN_REFLEN]; bool binlog_checkpoint_found; - bool first_round; IO_CACHE log; File file= -1; const char *errmsg; #ifdef HAVE_REPLICATION - rpl_gtid last_gtid; - bool last_gtid_standalone= false; - bool last_gtid_valid= false; + Recovery_context ctx; #endif DBUG_ENTER("TC_LOG_BINLOG::recover"); + /* + The for-loop variable is updated by the following rule set: + Initially set to 1. + After the initial binlog file is processed to identify + the Binlog-checkpoint file it is incremented when the latter file + is different from the initial one. Otherwise the only log has been + fully parsed so the for loop exits. + The 2nd round parses all earlier in binlog index order files + starting from the Binlog-checkpoint file. It ends when the initial + binlog file is reached. + */ + int round; if (! fdle->is_valid() || (my_hash_init(key_memory_binlog_recover_exec, &xids, @@ -10501,6 +11171,10 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error + /* finds xids when root is not NULL */ + if (do_xa && ha_recover(&xids, &mem_root)) + goto err1; + /* Scan the binlog for XIDs that need to be committed if still in the prepared stage. @@ -10510,10 +11184,9 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, */ binlog_checkpoint_found= false; - first_round= true; - for (;;) + for (round= 1;;) { - while ((ev= Log_event::read_log_event(first_round ? first_log : &log, + while ((ev= Log_event::read_log_event(round == 1 ? first_log : &log, fdle, opt_master_verify_checksum)) && ev->is_valid()) { @@ -10521,17 +11194,23 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, switch (typ) { case XID_EVENT: + if (do_xa) { - if (do_xa) + xid_recovery_member *member= + (xid_recovery_member*) + my_hash_search(&xids, (uchar*) &static_cast<Xid_log_event*>(ev)->xid, + sizeof(my_xid)); +#ifndef HAVE_REPLICATION { - Xid_log_event *xev=(Xid_log_event *)ev; - uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid, - sizeof(xev->xid)); - if (!x || my_hash_insert(&xids, x)) - goto err2; + if (member) + member->decided_to_commit= true; } - break; +#else + if (ctx.decide_or_assess(member, round, fdle, linfo, ev->log_pos)) + goto err2; +#endif } + break; case QUERY_EVENT: { Query_log_event *query_ev= (Query_log_event*) ev; @@ -10545,10 +11224,18 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, if (!x || my_hash_insert(&ddl_log_ids, x)) goto err2; } +#ifdef HAVE_REPLICATION + if (((Query_log_event *)ev)->is_commit() || + ((Query_log_event *)ev)->is_rollback()) + { + ctx.last_gtid_no2pc= true; + ctx.update_binlog_unsafe_coord_if_needed(linfo); + } +#endif break; } case BINLOG_CHECKPOINT_EVENT: - if (first_round && do_xa) + if (round == 1 && do_xa) { size_t dir_len; Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev; @@ -10568,8 +11255,9 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, } } break; +#ifdef HAVE_REPLICATION case GTID_LIST_EVENT: - if (first_round) + if (round == 1 || (ctx.do_truncate && ctx.id_binlog == 0)) { Gtid_list_log_event *glev= (Gtid_list_log_event *)ev; @@ -10579,20 +11267,13 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, } break; -#ifdef HAVE_REPLICATION case GTID_EVENT: - if (first_round) - { - Gtid_log_event *gev= (Gtid_log_event *)ev; - - /* Update the binlog state with any GTID logged after Gtid_list. */ - last_gtid.domain_id= gev->domain_id; - last_gtid.server_id= gev->server_id; - last_gtid.seq_no= gev->seq_no; - last_gtid_standalone= - ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false); - last_gtid_valid= true; - } + ctx.process_gtid(round, (Gtid_log_event *)ev, linfo); + break; + + case XA_PREPARE_LOG_EVENT: + ctx.last_gtid_no2pc= true; // TODO: complete MDEV-21469 that removes this block + ctx.update_binlog_unsafe_coord_if_needed(linfo); break; #endif @@ -10606,27 +11287,31 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, default: /* Nothing. */ break; - } + } // end of switch #ifdef HAVE_REPLICATION - if (last_gtid_valid && - ((last_gtid_standalone && !ev->is_part_of_group(typ)) || - (!last_gtid_standalone && - (typ == XID_EVENT || - typ == XA_PREPARE_LOG_EVENT || - (LOG_EVENT_IS_QUERY(typ) && - (((Query_log_event *)ev)->is_commit() || - ((Query_log_event *)ev)->is_rollback())))))) + if (ctx.last_gtid_valid && + ((ctx.last_gtid_standalone && !ev->is_part_of_group(typ)) || + (!ctx.last_gtid_standalone && + (typ == XID_EVENT || ctx.last_gtid_no2pc)))) { - if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false)) + DBUG_ASSERT(round == 1 || (ctx.do_truncate && !ctx.truncate_validated)); + DBUG_ASSERT(!ctx.last_gtid_no2pc || + (ctx.last_gtid_standalone || + typ == XA_PREPARE_LOG_EVENT || + (LOG_EVENT_IS_QUERY(typ) && + (((Query_log_event *)ev)->is_commit() || + ((Query_log_event *)ev)->is_rollback())))); + + if (rpl_global_gtid_binlog_state.update_nolock(&ctx.last_gtid, false)) goto err2; - last_gtid_valid= false; + ctx.last_gtid_valid= false; } + ctx.prev_event_pos= ev->log_pos; #endif - delete ev; ev= NULL; - } + } // end of while /* If the last binlog checkpoint event points to an older log, we have to @@ -10636,11 +11321,10 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, written by an older version of MariaDB (or MySQL) - these always have an (implicit) binlog checkpoint event at the start of the last binlog file. */ - if (first_round) + if (round == 1) { if (!binlog_checkpoint_found) break; - first_round= false; DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004", if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) && 0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name)) @@ -10658,33 +11342,50 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, end_io_cache(&log); mysql_file_close(file, MYF(MY_WME)); file= -1; + /* + NOTE: reading other binlog's FD is necessary for finding out + the checksum status of the respective binlog file. + */ + if (find_next_log(linfo, 1)) + { + sql_print_error("Error reading binlog files during recovery. " + "Aborting."); + goto err2; + } } +#ifdef HAVE_REPLICATION + int rc= ctx.next_binlog_or_round(round, last_log_name, + binlog_checkpoint_name, linfo, this); + if (rc == -1) + goto err2; + else if (rc == 1) + break; // all rounds done +#else if (!strcmp(linfo->log_file_name, last_log_name)) break; // No more files to do + round++; +#endif + if ((file= open_binlog(&log, linfo->log_file_name, &errmsg)) < 0) { sql_print_error("%s", errmsg); goto err2; } - /* - We do not need to read the Format_description_log_event of other binlog - files. It is not possible for a binlog checkpoint to span multiple - binlog files written by different versions of the server. So we can use - the first one read for reading from all binlog files. - */ - if (find_next_log(linfo, 1)) - { - sql_print_error("Error reading binlog files during recovery. Aborting."); - goto err2; - } fdle->reset_crypto(); - } + } // end of for if (do_xa) { - if (ha_recover(&xids)) - goto err2; + if (binlog_checkpoint_found) + { +#ifndef HAVE_REPLICATION + if (ha_recover_complete(&xids)) +#else + if (ctx.complete(this, xids)) +#endif + goto err2; + } } if (ddl_log_close_binlogged_events(&ddl_log_ids)) goto err2; @@ -10713,6 +11414,7 @@ err1: } + int MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery) { diff --git a/sql/log.h b/sql/log.h index eaf7cde1c07..d61e4660330 100644 --- a/sql/log.h +++ b/sql/log.h @@ -477,6 +477,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG /* Flag used to optimise around wait_for_prior_commit. */ bool queued_by_other; ulong binlog_id; + bool ro_1pc; // passes the binlog_cache_mngr::ro_1pc value to Gtid ctor }; /* @@ -810,7 +811,8 @@ public: my_bool *with_annotate= 0); // binary log write bool write_transaction_to_binlog(THD *thd, binlog_cache_mngr *cache_mngr, Log_event *end_ev, bool all, - bool using_stmt_cache, bool using_trx_cache); + bool using_stmt_cache, bool using_trx_cache, + bool is_ro_1pc); bool write_incident_already_locked(THD *thd); bool write_incident(THD *thd); @@ -860,6 +862,9 @@ public: int purge_first_log(Relay_log_info* rli, bool included); int set_purge_index_file_name(const char *base_file_name); int open_purge_index_file(bool destroy); + bool truncate_and_remove_binlogs(const char *truncate_file, + my_off_t truncate_pos, + rpl_gtid *gtid); bool is_inited_purge_index_file(); int close_purge_index_file(); int clean_purge_index_file(); @@ -896,7 +901,8 @@ public: void set_status_variables(THD *thd); bool is_xidlist_idle(); bool write_gtid_event(THD *thd, bool standalone, bool is_transactional, - uint64 commit_id); + uint64 commit_id, + bool has_xid= false, bool ro_1pc= false); int read_state_from_file(); int write_state_to_file(); int get_most_recent_gtid_list(rpl_gtid **list, uint32 *size); @@ -1237,4 +1243,8 @@ class Gtid_list_log_event; const char * get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list); +int binlog_commit(THD *thd, bool all, bool is_ro_1pc); +int binlog_commit_by_xid(handlerton *hton, XID *xid); +int binlog_rollback_by_xid(handlerton *hton, XID *xid); + #endif /* LOG_H */ diff --git a/sql/log_event.cc b/sql/log_event.cc index 9c7c56b1c34..c77059ee8f5 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -53,6 +53,7 @@ #include "sql_digest.h" #include "zlib.h" #include "myisampack.h" +#include <algorithm> #define my_b_write_string(A, B) my_b_write((A), (uchar*)(B), (uint) (sizeof(B) - 1)) @@ -2560,10 +2561,12 @@ Binlog_checkpoint_log_event::Binlog_checkpoint_log_event( Gtid_log_event::Gtid_log_event(const uchar *buf, uint event_len, const Format_description_log_event *description_event) - : Log_event(buf, description_event), seq_no(0), commit_id(0) + : Log_event(buf, description_event), seq_no(0), commit_id(0), + flags_extra(0), extra_engines(0) { uint8 header_size= description_event->common_header_len; uint8 post_header_len= description_event->post_header_len[GTID_EVENT-1]; + const uchar *buf_0= buf; if (event_len < (uint) header_size + (uint) post_header_len || post_header_len < GTID_HEADER_LEN) return; @@ -2597,6 +2600,32 @@ Gtid_log_event::Gtid_log_event(const uchar *buf, uint event_len, memcpy(xid.data, buf, data_length); buf+= data_length; } + + /* the extra flags check and actions */ + if (static_cast<uint>(buf - buf_0) < event_len) + { + flags_extra= *buf++; + /* + extra engines flags presence is identifed by non-zero byte value + at this point + */ + if (flags_extra & FL_EXTRA_MULTI_ENGINE) + { + DBUG_ASSERT(static_cast<uint>(buf - buf_0) < event_len); + + extra_engines= *buf++; + + DBUG_ASSERT(extra_engines > 0); + } + } + /* + the strict '<' part of the assert corresponds to extra zero-padded + trailing bytes, + */ + DBUG_ASSERT(static_cast<uint>(buf - buf_0) <= event_len); + /* and the last of them is tested. */ + DBUG_ASSERT(static_cast<uint>(buf - buf_0) == event_len || + buf_0[event_len - 1] == 0); } diff --git a/sql/log_event.h b/sql/log_event.h index 990d95e1dc0..3adc7a26d93 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -490,6 +490,16 @@ class String; #define LOG_EVENT_IGNORABLE_F 0x80 /** + @def LOG_EVENT_ACCEPT_OWN_F + + Flag sets by the semisync slave for accepting + the same server_id ("own") events which the slave must not have + in its state. Typically such events were never committed by + their originator (this server) and discared at its semisync-slave recovery. +*/ +#define LOG_EVENT_ACCEPT_OWN_F 0x4000 + +/** @def LOG_EVENT_SKIP_REPLICATION_F Flag set by application creating the event (with @@skip_replication); the @@ -3602,6 +3612,13 @@ public: event_mysql_xid_t xid; #endif uchar flags2; + uint flags_extra; // more flags area placed after the regular flags2's one + /* + Number of engine participants in transaction minus 1. + When zero the event does not contain that information. + */ + uint8 extra_engines; + /* Flags2. */ /* FL_STANDALONE is set when there is no terminating COMMIT event. */ @@ -3633,9 +3650,19 @@ public: /* FL_"COMMITTED or ROLLED-BACK"_XA is set for XA transaction. */ static const uchar FL_COMPLETED_XA= 128; + /* Flags_extra. */ + + /* + FL_EXTRA_MULTI_ENGINE is set for event group comprising a transaction + involving multiple storage engines. No flag and extra data are added + to the event when the transaction involves only one engine. + */ + static const uchar FL_EXTRA_MULTI_ENGINE= 1; + #ifdef MYSQL_SERVER Gtid_log_event(THD *thd_arg, uint64 seq_no, uint32 domain_id, bool standalone, - uint16 flags, bool is_transactional, uint64 commit_id); + uint16 flags, bool is_transactional, uint64 commit_id, + bool has_xid= false, bool is_ro_1pc= false); #ifdef HAVE_REPLICATION void pack_info(Protocol *protocol); virtual int do_apply_event(rpl_group_info *rgi); diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index e216724ca7a..d97e87fc4e9 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -644,7 +644,7 @@ Log_event::do_shall_skip(rpl_group_info *rgi) rli->replicate_same_server_id, rli->slave_skip_counter)); if ((server_id == global_system_variables.server_id && - !rli->replicate_same_server_id) || + !(rli->replicate_same_server_id || (flags & LOG_EVENT_ACCEPT_OWN_F))) || (rli->slave_skip_counter == 1 && rli->is_in_group()) || (flags & LOG_EVENT_SKIP_REPLICATION_F && opt_replicate_events_marked_for_skip != RPL_SKIP_REPLICATE)) @@ -3261,10 +3261,13 @@ bool Binlog_checkpoint_log_event::write() Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, uint32 domain_id_arg, bool standalone, uint16 flags_arg, bool is_transactional, - uint64 commit_id_arg) + uint64 commit_id_arg, bool has_xid, + bool ro_1pc) : Log_event(thd_arg, flags_arg, is_transactional), seq_no(seq_no_arg), commit_id(commit_id_arg), domain_id(domain_id_arg), - flags2((standalone ? FL_STANDALONE : 0) | (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)) + flags2((standalone ? FL_STANDALONE : 0) | + (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)), + flags_extra(0), extra_engines(0) { cache_type= Log_event::EVENT_NO_CACHE; bool is_tmp_table= thd_arg->lex->stmt_accessed_temp_table(); @@ -3287,15 +3290,40 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg, flags2|= (thd_arg->rgi_slave->gtid_ev_flags2 & (FL_DDL|FL_WAITED)); XID_STATE &xid_state= thd->transaction->xid_state; - if (is_transactional && xid_state.is_explicit_XA() && - (thd->lex->sql_command == SQLCOM_XA_PREPARE || - xid_state.get_state_code() == XA_PREPARED)) + if (is_transactional) { - DBUG_ASSERT(thd->lex->xa_opt != XA_ONE_PHASE); + if (xid_state.is_explicit_XA() && + (thd->lex->sql_command == SQLCOM_XA_PREPARE || + xid_state.get_state_code() == XA_PREPARED)) + { + DBUG_ASSERT(thd->lex->xa_opt != XA_ONE_PHASE); + + flags2|= thd->lex->sql_command == SQLCOM_XA_PREPARE ? + FL_PREPARED_XA : FL_COMPLETED_XA; + xid.set(xid_state.get_xid()); + } + /* count non-zero extra recoverable engines; total = extra + 1 */ + if (has_xid) + { + DBUG_ASSERT(ha_count_rw_2pc(thd_arg, + thd_arg->in_multi_stmt_transaction_mode())); + + extra_engines= + ha_count_rw_2pc(thd_arg, thd_arg->in_multi_stmt_transaction_mode()) - 1; + } + else if (ro_1pc) + { + extra_engines= UCHAR_MAX; + } + else if (thd->lex->sql_command == SQLCOM_XA_PREPARE) + { + DBUG_ASSERT(thd_arg->in_multi_stmt_transaction_mode()); - flags2|= thd->lex->sql_command == SQLCOM_XA_PREPARE ? - FL_PREPARED_XA : FL_COMPLETED_XA; - xid.set(xid_state.get_xid()); + uint8 count= ha_count_rw_2pc(thd_arg, true); + extra_engines= count > 1 ? 0 : UCHAR_MAX; + } + if (extra_engines > 0) + flags_extra|= FL_EXTRA_MULTI_ENGINE; } } @@ -3339,19 +3367,19 @@ Gtid_log_event::peek(const uchar *event_start, size_t event_len, bool Gtid_log_event::write() { - uchar buf[GTID_HEADER_LEN+2+sizeof(XID)]; - size_t write_len; + uchar buf[GTID_HEADER_LEN+2+sizeof(XID) + /* flags_extra: */ 1+4]; + size_t write_len= 13; int8store(buf, seq_no); int4store(buf+8, domain_id); buf[12]= flags2; if (flags2 & FL_GROUP_COMMIT_ID) { - int8store(buf+13, commit_id); + DBUG_ASSERT(write_len + 8 == GTID_HEADER_LEN + 2); + + int8store(buf+write_len, commit_id); write_len= GTID_HEADER_LEN + 2; } - else - write_len= 13; if (flags2 & (FL_PREPARED_XA | FL_COMPLETED_XA)) { @@ -3363,6 +3391,16 @@ Gtid_log_event::write() memcpy(buf+write_len, xid.data, data_length); write_len+= data_length; } + if (flags_extra > 0) + { + buf[write_len]= flags_extra; + write_len++; + } + if (flags_extra & FL_EXTRA_MULTI_ENGINE) + { + buf[write_len]= extra_engines; + write_len++; + } if (write_len < GTID_HEADER_LEN) { diff --git a/sql/slave.cc b/sql/slave.cc index 17c2d59c9d7..e7aa0d4a510 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -6174,6 +6174,15 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) bool is_malloc = false; bool is_rows_event= false; /* + The flag has replicate_same_server_id semantics and is raised to accept + a same-server-id event on the semisync slave, for both the gtid and legacy + connection modes. + Such events can appear as result of this server recovery so the event + was created there and replicated elsewhere right before the crash. At recovery + it could be evicted from the server's binlog. + */ + bool do_accept_own_server_id= false; + /* FD_q must have been prepared for the first R_a event inside get_master_version_and_clock() Show-up of FD:s affects checksum_alg at once because @@ -6234,6 +6243,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) unlock_data_lock= FALSE; goto err; } + DBUG_ASSERT(((uchar) buf[FLAGS_OFFSET] & LOG_EVENT_ACCEPT_OWN_F) == 0); if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 && buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */) @@ -6933,7 +6943,8 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) } else if ((s_id == global_system_variables.server_id && - !mi->rli.replicate_same_server_id) || + !(mi->rli.replicate_same_server_id || + (do_accept_own_server_id= rpl_semi_sync_slave_enabled))) || event_that_should_be_ignored(buf) || /* the following conjunction deals with IGNORE_SERVER_IDS, if set @@ -6993,6 +7004,19 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) } else { + if (do_accept_own_server_id) + { + int2store(const_cast<uchar*>(buf + FLAGS_OFFSET), + uint2korr(buf + FLAGS_OFFSET) | LOG_EVENT_ACCEPT_OWN_F); + if (checksum_alg != BINLOG_CHECKSUM_ALG_OFF) + { + ha_checksum crc= 0; + + crc= my_checksum(crc, (const uchar *) buf, + event_len - BINLOG_CHECKSUM_LEN); + int4store(&buf[event_len - BINLOG_CHECKSUM_LEN], crc); + } + } if (likely(!rli->relay_log.write_event_buffer((uchar*)buf, event_len))) { mi->master_log_pos+= inc_pos; diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 125abe334fc..5e64007ef84 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -831,6 +831,7 @@ void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key, flush_lock.release(flush_lsn); log_flush_notify(flush_lsn); + DBUG_EXECUTE_IF("crash_after_log_write_upto", DBUG_SUICIDE();); } /** write to the log file up to the last log entry. |