summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/suite/binlog/r/binlog_truncate_active_log.result237
-rw-r--r--mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result189
-rw-r--r--mysql-test/suite/binlog/r/binlog_truncate_multi_log.result53
-rw-r--r--mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result58
-rw-r--r--mysql-test/suite/binlog/t/binlog_truncate_active_log.inc57
-rw-r--r--mysql-test/suite/binlog/t/binlog_truncate_active_log.test102
-rw-r--r--mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc73
-rw-r--r--mysql-test/suite/binlog/t/binlog_truncate_multi_engine.opt1
-rw-r--r--mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test60
-rw-r--r--mysql-test/suite/binlog/t/binlog_truncate_multi_log.test77
-rw-r--r--mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test119
-rw-r--r--mysql-test/suite/mariabackup/include/have_rocksdb.inc4
-rw-r--r--mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result129
-rw-r--r--mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc77
-rw-r--r--mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf11
-rw-r--r--mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test144
-rw-r--r--sql/handler.cc256
-rw-r--r--sql/handler.h34
-rw-r--r--sql/log.cc894
-rw-r--r--sql/log.h14
-rw-r--r--sql/log_event.cc31
-rw-r--r--sql/log_event.h29
-rw-r--r--sql/log_event_server.cc68
-rw-r--r--sql/slave.cc26
-rw-r--r--storage/innobase/log/log0log.cc1
25 files changed, 2618 insertions, 126 deletions
diff --git a/mysql-test/suite/binlog/r/binlog_truncate_active_log.result b/mysql-test/suite/binlog/r/binlog_truncate_active_log.result
new file mode 100644
index 00000000000..3ad9f5c560c
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_truncate_active_log.result
@@ -0,0 +1,237 @@
+call mtr.add_suppression("Can.t init tc log");
+call mtr.add_suppression("Aborting");
+RESET MASTER;
+CREATE TABLE t (f INT) ENGINE=INNODB;
+CREATE TABLE t2 (f INT) ENGINE=INNODB;
+CREATE TABLE tm (f INT) ENGINE=Aria;
+# Case A.
+connect master1,localhost,root,,;
+connect master2,localhost,root,,;
+connect master3,localhost,root,,;
+connection default;
+INSERT INTO t VALUES (10);
+INSERT INTO tm VALUES (10);
+connection master1;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives";
+INSERT INTO t VALUES (20);
+connection master2;
+SET DEBUG_SYNC= "now WAIT_FOR master1_ready";
+SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready";
+DELETE FROM t2 WHERE f = 0 /* no such record */;
+connection master3;
+SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
+SELECT @@global.gtid_binlog_pos as 'Before the crash';
+Before the crash
+0-1-7
+connection default;
+# Kill the server
+disconnect master1;
+disconnect master2;
+disconnect master3;
+# restart: --rpl-semi-sync-slave-enabled=1
+FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-6/ in mysqld.1.err
+Pre-crash binlog file content:
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Gtid # # GTID #-#-#
+master-bin.000001 # Query # # use `test`; CREATE TABLE t (f INT) ENGINE=INNODB
+master-bin.000001 # Gtid # # GTID #-#-#
+master-bin.000001 # Query # # use `test`; CREATE TABLE t2 (f INT) ENGINE=INNODB
+master-bin.000001 # Gtid # # GTID #-#-#
+master-bin.000001 # Query # # use `test`; CREATE TABLE tm (f INT) ENGINE=Aria
+master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Query # # use `test`; INSERT INTO t VALUES (10)
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Gtid # # BEGIN GTID #-#-#
+master-bin.000001 # Query # # use `test`; INSERT INTO tm VALUES (10)
+master-bin.000001 # Query # # COMMIT
+SELECT @@global.gtid_binlog_pos as 'After the crash';
+After the crash
+0-1-5
+"One row should be present in table 't'"
+SELECT * FROM t;
+f
+10
+DELETE FROM t;
+# Case B.
+connect master1,localhost,root,,;
+connect master2,localhost,root,,;
+connect master3,localhost,root,,;
+connection default;
+INSERT INTO t VALUES (10);
+INSERT INTO tm VALUES (10);
+connection master1;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives";
+DELETE FROM t2 WHERE f = 0;
+connection master2;
+SET DEBUG_SYNC= "now WAIT_FOR master1_ready";
+SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready";
+INSERT INTO t VALUES (20);
+connection master3;
+SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
+SELECT @@global.gtid_binlog_pos as 'Before the crash';
+Before the crash
+0-1-10
+connection default;
+# Kill the server
+disconnect master1;
+disconnect master2;
+disconnect master3;
+# restart: --rpl-semi-sync-slave-enabled=1
+FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-10/ in mysqld.1.err
+Pre-crash binlog file content:
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Gtid # # BEGIN GTID #-#-#
+master-bin.000002 # Query # # use `test`; DELETE FROM t
+master-bin.000002 # Xid # # COMMIT /* XID */
+master-bin.000002 # Gtid # # BEGIN GTID #-#-#
+master-bin.000002 # Query # # use `test`; INSERT INTO t VALUES (10)
+master-bin.000002 # Xid # # COMMIT /* XID */
+master-bin.000002 # Gtid # # BEGIN GTID #-#-#
+master-bin.000002 # Query # # use `test`; INSERT INTO tm VALUES (10)
+master-bin.000002 # Query # # COMMIT
+master-bin.000002 # Gtid # # BEGIN GTID #-#-#
+master-bin.000002 # Query # # use `test`; DELETE FROM t2 WHERE f = 0
+master-bin.000002 # Query # # COMMIT
+SELECT @@global.gtid_binlog_pos as 'After the crash';
+After the crash
+0-1-9
+"One row should be present in table 't'"
+SELECT * FROM t;
+f
+10
+DELETE FROM t;
+# Case C.
+CREATE PROCEDURE sp_blank_xa()
+BEGIN
+XA START 'blank';
+DELETE FROM t2 WHERE f = 0 /* no such record */;
+XA END 'blank';
+XA PREPARE 'blank';
+END|
+connect master1,localhost,root,,;
+connect master2,localhost,root,,;
+connect master3,localhost,root,,;
+connection default;
+INSERT INTO t VALUES (10);
+INSERT INTO tm VALUES (10);
+connection master1;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives";
+INSERT INTO t VALUES (20);
+connection master2;
+SET DEBUG_SYNC= "now WAIT_FOR master1_ready";
+SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready";
+CALL sp_blank_xa;
+connection master3;
+SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
+SELECT @@global.gtid_binlog_pos as 'Before the crash';
+Before the crash
+0-1-15
+connection default;
+# Kill the server
+disconnect master1;
+disconnect master2;
+disconnect master3;
+# restart: --rpl-semi-sync-slave-enabled=1
+FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-14/ in mysqld.1.err
+Pre-crash binlog file content:
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000003 # Gtid # # BEGIN GTID #-#-#
+master-bin.000003 # Query # # use `test`; DELETE FROM t
+master-bin.000003 # Xid # # COMMIT /* XID */
+master-bin.000003 # Gtid # # GTID #-#-#
+master-bin.000003 # Query # # use `test`; CREATE DEFINER=`root`@`localhost` PROCEDURE `sp_blank_xa`()
+BEGIN
+XA START 'blank';
+DELETE FROM t2 WHERE f = 0 /* no such record */;
+XA END 'blank';
+XA PREPARE 'blank';
+END
+master-bin.000003 # Gtid # # BEGIN GTID #-#-#
+master-bin.000003 # Query # # use `test`; INSERT INTO t VALUES (10)
+master-bin.000003 # Xid # # COMMIT /* XID */
+master-bin.000003 # Gtid # # BEGIN GTID #-#-#
+master-bin.000003 # Query # # use `test`; INSERT INTO tm VALUES (10)
+master-bin.000003 # Query # # COMMIT
+SELECT @@global.gtid_binlog_pos as 'After the crash';
+After the crash
+0-1-13
+"One row should be present in table 't'"
+SELECT * FROM t;
+f
+10
+DELETE FROM t;
+DROP PROCEDURE sp_blank_xa;
+# Case D.
+CREATE PROCEDURE sp_xa()
+BEGIN
+XA START 'xid';
+DELETE FROM t WHERE f = 10;
+XA END 'xid';
+XA PREPARE 'xid';
+END|
+connect master1,localhost,root,,;
+connect master2,localhost,root,,;
+connect master3,localhost,root,,;
+connection default;
+INSERT INTO t VALUES (10);
+INSERT INTO tm VALUES (10);
+connection master1;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives";
+CALL sp_xa;
+connection master2;
+SET DEBUG_SYNC= "now WAIT_FOR master1_ready";
+SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready";
+INSERT INTO t2 VALUES (20);
+connection master3;
+SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
+SELECT @@global.gtid_binlog_pos as 'Before the crash';
+Before the crash
+0-1-20
+connection default;
+# Kill the server
+disconnect master1;
+disconnect master2;
+disconnect master3;
+# restart: --rpl-semi-sync-slave-enabled=1
+FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-20/ in mysqld.1.err
+Pre-crash binlog file content:
+include/show_binlog_events.inc
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000004 # Gtid # # BEGIN GTID #-#-#
+master-bin.000004 # Query # # use `test`; DELETE FROM t
+master-bin.000004 # Xid # # COMMIT /* XID */
+master-bin.000004 # Gtid # # GTID #-#-#
+master-bin.000004 # Query # # use `test`; DROP PROCEDURE sp_blank_xa
+master-bin.000004 # Gtid # # GTID #-#-#
+master-bin.000004 # Query # # use `test`; CREATE DEFINER=`root`@`localhost` PROCEDURE `sp_xa`()
+BEGIN
+XA START 'xid';
+DELETE FROM t WHERE f = 10;
+XA END 'xid';
+XA PREPARE 'xid';
+END
+master-bin.000004 # Gtid # # BEGIN GTID #-#-#
+master-bin.000004 # Query # # use `test`; INSERT INTO t VALUES (10)
+master-bin.000004 # Xid # # COMMIT /* XID */
+master-bin.000004 # Gtid # # BEGIN GTID #-#-#
+master-bin.000004 # Query # # use `test`; INSERT INTO tm VALUES (10)
+master-bin.000004 # Query # # COMMIT
+master-bin.000004 # Gtid # # XA START X'786964',X'',1 GTID #-#-#
+master-bin.000004 # Query # # use `test`; DELETE FROM t WHERE f = 10
+master-bin.000004 # Query # # XA END X'786964',X'',1
+master-bin.000004 # XA_prepare # # XA PREPARE X'786964',X'',1
+SELECT @@global.gtid_binlog_pos as 'After the crash';
+After the crash
+0-1-19
+"One row should be present in table 't'"
+SELECT * FROM t;
+f
+10
+DELETE FROM t;
+DROP PROCEDURE sp_xa;
+# Cleanup
+DROP TABLE t,t2,tm;
+# End of the tests
diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result
new file mode 100644
index 00000000000..b8fd04497f2
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_engine.result
@@ -0,0 +1,189 @@
+call mtr.add_suppression("Can.t init tc log");
+call mtr.add_suppression("Aborting");
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb;
+#
+#
+# Case "A" : "neither engine committed => rollback & binlog truncate"
+#
+RESET MASTER;
+FLUSH LOGS;
+SET GLOBAL max_binlog_size= 4096;
+connect con1,localhost,root,,;
+List of binary logs before rotation
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+INSERT INTO t1 VALUES (1, REPEAT("x", 1));
+INSERT INTO t2 VALUES (1, REPEAT("x", 1));
+BEGIN;
+INSERT INTO t2 VALUES (2, REPEAT("x", 4100));
+INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
+SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR signal_no_signal";
+COMMIT;
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+List of binary logs after rotation
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+master-bin.000003 #
+# restart the server with --rpl-semi-sync-slave-enabled=1
+# the server is restarted
+# restart: --rpl-semi-sync-slave-enabled=1
+connection default;
+#
+# *** Summary: 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3:
+#
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+SELECT COUNT(*) FROM t2;
+COUNT(*)
+1
+SELECT @@GLOBAL.gtid_binlog_state;
+@@GLOBAL.gtid_binlog_state
+0-1-2
+SELECT @@GLOBAL.gtid_binlog_pos;
+@@GLOBAL.gtid_binlog_pos
+0-1-2
+List of binary logs at the end of the tests
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+master-bin.000003 #
+# ***
+DELETE FROM t1;
+DELETE FROM t2;
+disconnect con1;
+#
+Proof of the truncated binlog file is readable (two transactions must be seen):
+/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/;
+/*!40019 SET @@session.max_insert_delayed_threads=0*/;
+/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
+DELIMITER /*!*/;
+START TRANSACTION
+/*!*/;
+COMMIT/*!*/;
+START TRANSACTION
+/*!*/;
+COMMIT/*!*/;
+DELIMITER ;
+# End of log file
+ROLLBACK /* added by mysqlbinlog */;
+/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
+/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/;
+#
+#
+# Case "B" : "one engine has committed its transaction branch"
+#
+RESET MASTER;
+FLUSH LOGS;
+SET GLOBAL max_binlog_size= 4096;
+connect con1,localhost,root,,;
+List of binary logs before rotation
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+INSERT INTO t1 VALUES (1, REPEAT("x", 1));
+INSERT INTO t2 VALUES (1, REPEAT("x", 1));
+SET GLOBAL debug_dbug="d,enable_log_write_upto_crash";
+BEGIN;
+INSERT INTO t2 VALUES (2, REPEAT("x", 4100));
+INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
+COMMIT;
+connection default;
+# restart: --rpl-semi-sync-slave-enabled=1
+connection default;
+#
+# *** Summary: 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4:
+#
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+SELECT COUNT(*) FROM t2;
+COUNT(*)
+2
+SELECT @@GLOBAL.gtid_binlog_state;
+@@GLOBAL.gtid_binlog_state
+0-1-3
+SELECT @@GLOBAL.gtid_binlog_pos;
+@@GLOBAL.gtid_binlog_pos
+0-1-3
+List of binary logs at the end of the tests
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+master-bin.000003 #
+master-bin.000004 #
+# ***
+DELETE FROM t1;
+DELETE FROM t2;
+disconnect con1;
+#
+#
+#
+# Case "C" : "both engines have committed its transaction branch"
+#
+RESET MASTER;
+FLUSH LOGS;
+SET GLOBAL max_binlog_size= 4096;
+connect con1,localhost,root,,;
+List of binary logs before rotation
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+INSERT INTO t1 VALUES (1, REPEAT("x", 1));
+INSERT INTO t2 VALUES (1, REPEAT("x", 1));
+BEGIN;
+INSERT INTO t2 VALUES (2, REPEAT("x", 4100));
+INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
+SET DEBUG_SYNC= "commit_after_run_commit_ordered SIGNAL con1_ready WAIT_FOR signal_no_signal";
+COMMIT;
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+List of binary logs after rotation
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+master-bin.000003 #
+# restart the server with --rpl-semi-sync-slave-enabled=1
+# the server is restarted
+# restart: --rpl-semi-sync-slave-enabled=1
+connection default;
+#
+# *** Summary: 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4:
+#
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+SELECT COUNT(*) FROM t2;
+COUNT(*)
+2
+SELECT @@GLOBAL.gtid_binlog_state;
+@@GLOBAL.gtid_binlog_state
+0-1-3
+SELECT @@GLOBAL.gtid_binlog_pos;
+@@GLOBAL.gtid_binlog_pos
+0-1-3
+List of binary logs at the end of the tests
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+master-bin.000003 #
+master-bin.000004 #
+# ***
+DELETE FROM t1;
+DELETE FROM t2;
+disconnect con1;
+#
+DROP TABLE t1, t2;
+# End of the tests
diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result
new file mode 100644
index 00000000000..e11a40feb2f
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result
@@ -0,0 +1,53 @@
+call mtr.add_suppression("Can.t init tc log");
+call mtr.add_suppression("Aborting");
+SET @@global.max_binlog_size= 4096;
+RESET MASTER;
+FLUSH LOGS;
+CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+CREATE TABLE tm (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=MyISAM;
+connect master1,localhost,root,,;
+"List of binary logs before rotation"
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+INSERT INTO ti VALUES(1,"I am gonna survive");
+INSERT INTO tm VALUES(1,"me too!");
+SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR con1_go";
+INSERT INTO ti VALUES (2, REPEAT("x", 4100));
+connect master2,localhost,root,,;
+SET DEBUG_SYNC= "now WAIT_FOR master1_ready";
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go";
+INSERT INTO ti VALUES (3, "not gonna survive");
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
+"List of binary logs before crash"
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+master-bin.000003 #
+# The gtid binlog state prior the crash will be truncated at the end of the test
+SELECT @@global.gtid_binlog_state;
+@@global.gtid_binlog_state
+0-1-6
+connection default;
+# Kill the server
+disconnect master1;
+disconnect master2;
+# restart: --rpl-semi-sync-slave-enabled=1
+FOUND 1 /truncated binlog file:.*master.*000002/ in mysqld.1.err
+"One record should be present in table"
+SELECT * FROM ti;
+a b
+1 I am gonna survive
+# The truncated gtid binlog state
+SELECT @@global.gtid_binlog_state;
+@@global.gtid_binlog_state
+0-1-4
+SELECT @@global.gtid_binlog_pos;
+@@global.gtid_binlog_pos
+0-1-4
+# Cleanup
+DROP TABLE ti;
+# End of the tests
diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result
new file mode 100644
index 00000000000..0ee9a7c871d
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log_unsafe.result
@@ -0,0 +1,58 @@
+SET @@global.max_binlog_size= 4096;
+call mtr.add_suppression("Table '.*tm' is marked as crashed and should be repaired");
+call mtr.add_suppression("Got an error from unknown thread");
+call mtr.add_suppression("Checking table: '.*tm'");
+call mtr.add_suppression("Recovering table: '.*tm'");
+call mtr.add_suppression("Cannot truncate the binary log to file");
+call mtr.add_suppression("Crash recovery failed");
+call mtr.add_suppression("Can.t init tc log");
+call mtr.add_suppression("Aborting");
+call mtr.add_suppression("Found 1 prepared transactions");
+call mtr.add_suppression("mysqld: Table.*tm.*is marked as crashed");
+call mtr.add_suppression("Checking table.*tm");
+RESET MASTER;
+FLUSH LOGS;
+CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+CREATE TABLE tm (f INT) ENGINE=MYISAM;
+INSERT INTO tm VALUES(1);
+connect master1,localhost,root,,;
+connect master2,localhost,root,,;
+connect master3,localhost,root,,;
+connection master1;
+SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR master1_go";
+INSERT INTO ti VALUES (5 - 1, REPEAT("x", 4100));
+connection master2;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go";
+INSERT INTO ti VALUES (5, REPEAT("x", 1));
+connection master3;
+SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
+SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master3_ready";
+INSERT INTO tm VALUES (2);
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR master3_ready";
+# The gtid binlog state prior the crash must be restored at the end of the test;
+SELECT @@global.gtid_binlog_state;
+@@global.gtid_binlog_state
+0-1-9
+# Kill the server
+# Failed restart as the semisync slave
+# Normal restart
+# restart
+FOUND 1 /Cannot truncate the binary log to file/ in mysqld.1.err
+# Proof that the in-doubt transactions are recovered by the 2nd normal server restart
+SELECT COUNT(*) = 5 as 'True' FROM ti;
+True
+1
+SELECT COUNT(*) <= 1 FROM tm;
+COUNT(*) <= 1
+1
+# The gtid binlog state prior the crash is restored now
+SELECT @@GLOBAL.gtid_binlog_state;
+@@GLOBAL.gtid_binlog_state
+0-1-9
+SELECT @@GLOBAL.gtid_binlog_pos;
+@@GLOBAL.gtid_binlog_pos
+0-1-9
+# Cleanup
+DROP TABLE ti, tm;
+End of test
diff --git a/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc b/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc
new file mode 100644
index 00000000000..0bc83477d91
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc
@@ -0,0 +1,57 @@
+connect(master1,localhost,root,,);
+connect(master2,localhost,root,,);
+connect(master3,localhost,root,,);
+
+--connection default
+
+# First to commit few transactions
+INSERT INTO t VALUES (10);
+INSERT INTO tm VALUES (10);
+
+--connection master1
+# Hold insert after write to binlog and before "run_commit_ordered" in engine
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives";
+--send_eval $query1
+
+--connection master2
+SET DEBUG_SYNC= "now WAIT_FOR master1_ready";
+SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready";
+--send_eval $query2
+
+--connection master3
+SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
+SELECT @@global.gtid_binlog_pos as 'Before the crash';
+
+--connection default
+--source include/kill_mysqld.inc
+--disconnect master1
+--disconnect master2
+--disconnect master3
+
+#
+# Server restart
+#
+--let $restart_parameters= --rpl-semi-sync-slave-enabled=1
+--source include/start_mysqld.inc
+
+# Check error log for a successful truncate message.
+--let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err
+
+--let SEARCH_FILE=$log_error_
+--let SEARCH_PATTERN=Successfully truncated.*to remove transactions starting from GTID $truncate_gtid_pos
+
+--source include/search_pattern_in_file.inc
+
+--echo Pre-crash binlog file content:
+--let $binlog_file= query_get_value(show binary logs, Log_name, $binlog_file_index)
+--source include/show_binlog_events.inc
+
+SELECT @@global.gtid_binlog_pos as 'After the crash';
+--echo "One row should be present in table 't'"
+SELECT * FROM t;
+
+# prepare binlog file index for the next test
+--inc $binlog_file_index
+
+# Local cleanup
+DELETE FROM t;
diff --git a/mysql-test/suite/binlog/t/binlog_truncate_active_log.test b/mysql-test/suite/binlog/t/binlog_truncate_active_log.test
new file mode 100644
index 00000000000..dbba8697b86
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_truncate_active_log.test
@@ -0,0 +1,102 @@
+# ==== Purpose ====
+#
+# Test verifies the truncation of single binary log file.
+#
+# ==== References ====
+#
+# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server
+
+--source include/have_innodb.inc
+--source include/have_aria.inc
+# File: binlog_truncate_active_log.inc included in test makes use of
+# 'debug_sync' facility.
+--source include/have_debug_sync.inc
+--source include/have_binlog_format_statement.inc
+
+call mtr.add_suppression("Can.t init tc log");
+call mtr.add_suppression("Aborting");
+
+# The following cases are tested:
+# A. 2pc transaction is followed by a blank "zero-engines" one
+# B. 2pc transaction follows the blank one
+# C. Similarly to A, with the XA blank transaction
+
+RESET MASTER;
+CREATE TABLE t (f INT) ENGINE=INNODB;
+CREATE TABLE t2 (f INT) ENGINE=INNODB;
+CREATE TABLE tm (f INT) ENGINE=Aria;
+
+# Old (pre-crash) binlog file index initial value.
+# It keeps incremented at the end of each case.
+--let $binlog_file_index=1
+
+--echo # Case A.
+# Using 'debug_sync' hold 'query1' execution after 'query1' is flushed and
+# synced to binary log but not yet committed. In an another connection hold
+# 'query2' execution after 'query2' is flushed and synced to binlog.
+# Crash and restart server with --rpl-semi-sync-slave-enabled=1
+#
+# During recovery of binary log 'query1' status is checked with InnoDB engine,
+# it will be in prepared but not yet commited. All transactions starting from
+# 'query1' onwards will be removed from the binary log.
+# Show-binlog-events is to prove that.
+
+--let $truncate_gtid_pos = 0-1-6
+--let $query1 = INSERT INTO t VALUES (20)
+--let $query2 = DELETE FROM t2 WHERE f = 0 /* no such record */
+--source binlog_truncate_active_log.inc
+
+--echo # Case B.
+# The inverted sequence ends up to truncate starting from $query2
+--let $truncate_gtid_pos = 0-1-10
+--let $query1 = DELETE FROM t2 WHERE f = 0
+--let $query2 = INSERT INTO t VALUES (20)
+--source binlog_truncate_active_log.inc
+
+
+--echo # Case C.
+delimiter |;
+CREATE PROCEDURE sp_blank_xa()
+BEGIN
+ XA START 'blank';
+ DELETE FROM t2 WHERE f = 0 /* no such record */;
+ XA END 'blank';
+ XA PREPARE 'blank';
+END|
+delimiter ;|
+
+# The same as in A with $query2 being the zero-engine XA transaction.
+# Both $query1 and $query2 are going to be truncated.
+--let $truncate_gtid_pos = 0-1-14
+--let $query1 = INSERT INTO t VALUES (20)
+--let $query2 = CALL sp_blank_xa
+--source binlog_truncate_active_log.inc
+
+DROP PROCEDURE sp_blank_xa;
+
+
+--echo # Case D.
+delimiter |;
+CREATE PROCEDURE sp_xa()
+BEGIN
+ XA START 'xid';
+ DELETE FROM t WHERE f = 10;
+ XA END 'xid';
+ XA PREPARE 'xid';
+END|
+delimiter ;|
+
+# The same as in B with $query1 being the prepared XA transaction.
+# Truncation must occurs at $query2.
+--let $truncate_gtid_pos = 0-1-20
+--let $query1 = CALL sp_xa
+--let $query2 = INSERT INTO t2 VALUES (20)
+--source binlog_truncate_active_log.inc
+
+DROP PROCEDURE sp_xa;
+
+
+--echo # Cleanup
+DROP TABLE t,t2,tm;
+
+--echo # End of the tests
diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc
new file mode 100644
index 00000000000..52ce4741eaa
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.inc
@@ -0,0 +1,73 @@
+#
+# Invoked by binlog_truncate_multi_engine.test
+# Parameters:
+# $debug_sync_action describes debug-sync actions
+# $kill_server 1 when to crash, 0 for regular restart
+# $restart_parameters the caller may simulate partial commit at recovery
+# $test_outcome summary of extected results
+# $MYSQLD_DATADIR
+
+--echo #
+--echo #
+--echo # Case $case : $description
+--echo #
+RESET MASTER;
+FLUSH LOGS;
+SET GLOBAL max_binlog_size= 4096;
+
+connect(con1,localhost,root,,);
+--echo List of binary logs before rotation
+--source include/show_binary_logs.inc
+INSERT INTO t1 VALUES (1, REPEAT("x", 1));
+INSERT INTO t2 VALUES (1, REPEAT("x", 1));
+if (`SELECT $case = "B"`)
+{
+ --write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait-binlog_truncate_multi_engine.test
+EOF
+
+ SET GLOBAL debug_dbug="d,enable_log_write_upto_crash";
+}
+BEGIN;
+ INSERT INTO t2 VALUES (2, REPEAT("x", 4100));
+ INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
+
+if (`SELECT $debug_sync_action != ""`)
+{
+ --eval SET DEBUG_SYNC= $debug_sync_action
+}
+send COMMIT;
+
+--connection default
+if (`SELECT $case = "B"`)
+{
+ --source include/wait_until_disconnected.inc
+ --source include/start_mysqld.inc
+}
+if (`SELECT $case != "B"`)
+{
+ SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+ --echo List of binary logs after rotation
+ --source include/show_binary_logs.inc
+
+ --echo # restart the server with $restart_parameters
+ --echo # the server is restarted
+ --source include/restart_mysqld.inc
+}
+
+--connection default
+--echo #
+--echo # *** Summary: $test_outcome:
+--echo #
+SELECT COUNT(*) FROM t1;
+SELECT COUNT(*) FROM t2;
+SELECT @@GLOBAL.gtid_binlog_state;
+SELECT @@GLOBAL.gtid_binlog_pos;
+--echo List of binary logs at the end of the tests
+--source include/show_binary_logs.inc
+--echo # ***
+# cleanup
+DELETE FROM t1;
+DELETE FROM t2;
+--disconnect con1
+--echo #
diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.opt b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.opt
new file mode 100644
index 00000000000..03e7d74f6db
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.opt
@@ -0,0 +1 @@
+--plugin-load=$HA_ROCKSDB_SO
diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test
new file mode 100644
index 00000000000..f8e32f16f0f
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_engine.test
@@ -0,0 +1,60 @@
+# ==== Purpose ====
+#
+# Test verifies truncation of multiple binary logs with multiple transactional
+# storage engines
+#
+# ==== References ====
+#
+# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server
+
+--source include/have_rocksdb.inc
+--source include/have_innodb.inc
+--source include/have_debug.inc
+--source include/have_debug_sync.inc
+--source include/have_binlog_format_row.inc
+
+--let $old_max_binlog_size= `select @@global.max_binlog_size`
+call mtr.add_suppression("Can.t init tc log");
+call mtr.add_suppression("Aborting");
+--let $MYSQLD_DATADIR= `SELECT @@datadir`
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=rocksdb;
+
+--let $case = "A"
+--let $description = "neither engine committed => rollback & binlog truncate"
+# Hold off engine commits after write to binlog and its rotation.
+# The transaction is killed along with the server after that.
+--let $shutdown_timeout=0
+--let $debug_sync_action = "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR signal_no_signal"
+--let $restart_parameters = --rpl-semi-sync-slave-enabled=1
+--let $test_outcome= 1 row should be present in both tables; binlog is truncated; number of binlogs at reconnect - 3
+--source binlog_truncate_multi_engine.inc
+--echo Proof of the truncated binlog file is readable (two transactions must be seen):
+--exec $MYSQL_BINLOG --short-form --skip-annotate-row-events $MYSQLD_DATADIR/master-bin.000002
+
+--let $case = "B"
+--let $description = "one engine has committed its transaction branch"
+# Hold off after one engine has committed.
+--let $shutdown_timeout=0
+--let $debug_sync_action = ""
+# Both debug_sync and debug-dbug are required to make sure Engines remember the commit state
+# debug_sync alone will not help.
+--let $restart_parameters = --rpl-semi-sync-slave-enabled=1
+--let $test_outcome= 2 rows should be present in both tables; no binlog truncation; one extra binlog file compare with A; number of binlogs at reconnect - 4
+--source binlog_truncate_multi_engine.inc
+
+--let $case = "C"
+--let $description= "both engines have committed its transaction branch"
+--let $debug_sync_action = "commit_after_run_commit_ordered SIGNAL con1_ready WAIT_FOR signal_no_signal"
+# Hold off after both engines have committed. The server is shut down.
+--let $shutdown_timeout=
+--let $restart_parameters = --rpl-semi-sync-slave-enabled=1
+--let $test_outcome= 2 rows should be present in both tables; no binlog truncation; the same # of binlog files as in B; number of binlogs at reconnect - 4
+--source binlog_truncate_multi_engine.inc
+
+
+
+DROP TABLE t1, t2;
+
+--echo # End of the tests
diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test
new file mode 100644
index 00000000000..4ea7f9a559e
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test
@@ -0,0 +1,77 @@
+# ==== Purpose ====
+#
+# Test verifies truncation of multiple binary logs.
+#
+# ==== References ====
+# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server
+
+--source include/have_innodb.inc
+--source include/have_debug_sync.inc
+--source include/have_binlog_format_row.inc
+
+call mtr.add_suppression("Can.t init tc log");
+call mtr.add_suppression("Aborting");
+
+SET @@global.max_binlog_size= 4096;
+
+RESET MASTER;
+FLUSH LOGS;
+CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+CREATE TABLE tm (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=MyISAM;
+
+connect(master1,localhost,root,,);
+--echo "List of binary logs before rotation"
+--source include/show_binary_logs.inc
+
+# Some load to either non- and transactional egines
+# that should not affect the following recovery:
+INSERT INTO ti VALUES(1,"I am gonna survive");
+INSERT INTO tm VALUES(1,"me too!");
+
+# hold on near engine commit
+SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR con1_go";
+--send INSERT INTO ti VALUES (2, REPEAT("x", 4100))
+
+connect(master2,localhost,root,,);
+# The 2nd trx for recovery, it does not rotate binlog
+SET DEBUG_SYNC= "now WAIT_FOR master1_ready";
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go";
+--send INSERT INTO ti VALUES (3, "not gonna survive")
+
+--connection default
+SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
+--echo "List of binary logs before crash"
+--source include/show_binary_logs.inc
+--echo # The gtid binlog state prior the crash will be truncated at the end of the test
+SELECT @@global.gtid_binlog_state;
+
+--connection default
+--source include/kill_mysqld.inc
+--disconnect master1
+--disconnect master2
+
+#
+# Server restart
+#
+--let $restart_parameters= --rpl-semi-sync-slave-enabled=1
+--source include/start_mysqld.inc
+
+# Check error log for a successful truncate message.
+let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err;
+
+--let SEARCH_FILE=$log_error_
+--let SEARCH_PATTERN=truncated binlog file:.*master.*000002
+--source include/search_pattern_in_file.inc
+
+
+--echo "One record should be present in table"
+SELECT * FROM ti;
+
+--echo # The truncated gtid binlog state
+SELECT @@global.gtid_binlog_state;
+SELECT @@global.gtid_binlog_pos;
+
+--echo # Cleanup
+DROP TABLE ti;
+
+--echo # End of the tests
diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test
new file mode 100644
index 00000000000..04d8619e24e
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log_unsafe.test
@@ -0,0 +1,119 @@
+# ==== Purpose ====
+# The test verifies attempt to recover by the semisync slave server whose
+# binlog is unsafe for truncation.
+#
+# ==== Implementation ====
+# 2 binlog files are created with the 1st one destined to be the binlog
+# checkpoint file for recovery.
+# The final group of events is replication unsafe (myisam INSERT).
+# Therefore the semisync slave recovery may not.
+#
+# Steps:
+# 0 - Set max_binlog_size= 4096, to help an insert into a
+# transaction table 'ti' get binlog rotated while the
+# transaction won't be committed, being stopped at
+# a prior to commit debug_sync point
+# 1 - insert into a non-transactional 'tm' table completes with
+# binary logging as well
+# 2 - kill and attempt to restart the server as semisync slave that
+# must produce an expected unsafe-to-recover error
+# 3 - complete the test with a normal restart that successfully finds and
+# commits the transaction in doubt.
+#
+# ==== References ====
+#
+# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server
+#
+
+--source include/have_innodb.inc
+--source include/have_debug_sync.inc
+--source include/have_binlog_format_row.inc
+
+SET @@global.max_binlog_size= 4096;
+
+call mtr.add_suppression("Table '.*tm' is marked as crashed and should be repaired");
+call mtr.add_suppression("Got an error from unknown thread");
+call mtr.add_suppression("Checking table: '.*tm'");
+call mtr.add_suppression("Recovering table: '.*tm'");
+call mtr.add_suppression("Cannot truncate the binary log to file");
+call mtr.add_suppression("Crash recovery failed");
+call mtr.add_suppression("Can.t init tc log");
+call mtr.add_suppression("Aborting");
+call mtr.add_suppression("Found 1 prepared transactions");
+call mtr.add_suppression("mysqld: Table.*tm.*is marked as crashed");
+call mtr.add_suppression("Checking table.*tm");
+
+RESET MASTER;
+FLUSH LOGS;
+CREATE TABLE ti (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+CREATE TABLE tm (f INT) ENGINE=MYISAM;
+
+--let $row_count = 5
+--let $i = `select $row_count-2`
+--disable_query_log
+while ($i)
+{
+ --eval INSERT INTO ti VALUES ($i, REPEAT("x", 1))
+ --dec $i
+}
+--enable_query_log
+INSERT INTO tm VALUES(1);
+
+connect(master1,localhost,root,,);
+connect(master2,localhost,root,,);
+connect(master3,localhost,root,,);
+
+--connection master1
+
+# The 1st trx binlogs, rotate binlog and hold on before committing at engine
+SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL master1_ready WAIT_FOR master1_go";
+--send_eval INSERT INTO ti VALUES ($row_count - 1, REPEAT("x", 4100))
+
+--connection master2
+
+# The 2nd trx for recovery, it does not rotate binlog
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT_FOR master2_go";
+--send_eval INSERT INTO ti VALUES ($row_count, REPEAT("x", 1))
+
+--connection master3
+SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
+SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master3_ready";
+--send INSERT INTO tm VALUES (2)
+
+--connection default
+SET DEBUG_SYNC= "now WAIT_FOR master3_ready";
+--echo # The gtid binlog state prior the crash must be restored at the end of the test;
+SELECT @@global.gtid_binlog_state;
+--source include/kill_mysqld.inc
+
+#
+# Server restarts
+#
+--echo # Failed restart as the semisync slave
+--error 1
+--exec $MYSQLD_LAST_CMD --rpl-semi-sync-slave-enabled=1 >> $MYSQLTEST_VARDIR/log/mysqld.1.err 2>&1
+
+--echo # Normal restart
+--source include/start_mysqld.inc
+
+# Check error log for correct messages.
+let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err;
+
+--let SEARCH_FILE=$log_error_
+--let SEARCH_PATTERN=Cannot truncate the binary log to file
+--source include/search_pattern_in_file.inc
+
+--echo # Proof that the in-doubt transactions are recovered by the 2nd normal server restart
+--eval SELECT COUNT(*) = $row_count as 'True' FROM ti
+# myisam table may require repair (which is not tested here)
+--disable_warnings
+SELECT COUNT(*) <= 1 FROM tm;
+--enable_warnings
+
+--echo # The gtid binlog state prior the crash is restored now
+SELECT @@GLOBAL.gtid_binlog_state;
+SELECT @@GLOBAL.gtid_binlog_pos;
+
+--echo # Cleanup
+DROP TABLE ti, tm;
+--echo End of test
diff --git a/mysql-test/suite/mariabackup/include/have_rocksdb.inc b/mysql-test/suite/mariabackup/include/have_rocksdb.inc
deleted file mode 100644
index d59f76f6cf3..00000000000
--- a/mysql-test/suite/mariabackup/include/have_rocksdb.inc
+++ /dev/null
@@ -1,4 +0,0 @@
-if (`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_NAME = 'rocksdb'`)
-{
- --skip Requires rocksdb
-} \ No newline at end of file
diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result
new file mode 100644
index 00000000000..233f4acbcc0
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result
@@ -0,0 +1,129 @@
+include/master-slave.inc
+[connection master]
+connection server_2;
+include/stop_slave.inc
+connection server_1;
+RESET MASTER;
+SET @@global.max_binlog_size= 4096;
+connection server_2;
+RESET MASTER;
+SET @@global.max_binlog_size= 4096;
+set @@global.rpl_semi_sync_slave_enabled = 1;
+set @@global.gtid_slave_pos = "";
+CHANGE MASTER TO master_use_gtid= slave_pos;
+include/start_slave.inc
+connection server_1;
+ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
+set @@global.rpl_semi_sync_master_enabled = 1;
+set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC;
+call mtr.add_suppression("Can.t init tc log");
+call mtr.add_suppression("Aborting");
+call mtr.add_suppression("1 client is using or hasn.t closed the table properly");
+call mtr.add_suppression("Table './mtr/test_suppressions' is marked as crashed and should be repaired");
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+INSERT INTO t1 VALUES (1, 'dummy1');
+connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,;
+SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go";
+INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
+connection server_1;
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+# Kill the server
+connection server_2;
+include/stop_slave.inc
+SELECT @@GLOBAL.gtid_current_pos;
+@@GLOBAL.gtid_current_pos
+0-1-8
+# restart: --rpl-semi-sync-slave-enabled=1
+connection server_1;
+FOUND 1 /truncated binlog file:.*master.*000001/ in mysqld.1.err
+disconnect conn_client;
+connection server_2;
+set global rpl_semi_sync_master_enabled = 1;
+set global rpl_semi_sync_master_wait_point=AFTER_SYNC;
+connection server_1;
+CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS;
+set global rpl_semi_sync_slave_enabled = 1;
+set @@global.gtid_slave_pos=@@global.gtid_binlog_pos;
+include/start_slave.inc
+connection server_2;
+INSERT INTO t1 VALUES (3, 'dummy3');
+# The gtid state on current master must be equal to ...
+SHOW VARIABLES LIKE 'gtid_binlog_pos';
+Variable_name Value
+gtid_binlog_pos 0-2-9
+connection server_1;
+SELECT COUNT(*) = 3 as 'true' FROM t1;
+true
+1
+# ... the gtid states on the slave:
+SHOW VARIABLES LIKE 'gtid_slave_pos';
+Variable_name Value
+gtid_slave_pos 0-2-9
+SHOW VARIABLES LIKE 'gtid_binlog_pos';
+Variable_name Value
+gtid_binlog_pos 0-2-9
+connection server_2;
+connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,;
+SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go";
+INSERT INTO t1 VALUES (4, REPEAT("x", 4100));
+connect conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,;
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con2_go";
+INSERT INTO t1 VALUES (5, REPEAT("x", 4100));
+connection server_2;
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+# Kill the server
+connection server_1;
+include/stop_slave.inc
+SELECT @@GLOBAL.gtid_current_pos;
+@@GLOBAL.gtid_current_pos
+0-2-11
+# restart: --rpl-semi-sync-slave-enabled=1
+connection server_2;
+NOT FOUND /truncated binlog file:.*slave.*000001/ in mysqld.2.err
+disconnect conn_client;
+connection server_1;
+set global rpl_semi_sync_master_enabled = 1;
+set global rpl_semi_sync_master_wait_point=AFTER_SYNC;
+connection server_2;
+CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS;
+set global rpl_semi_sync_slave_enabled = 1;
+set @@global.gtid_slave_pos=@@global.gtid_binlog_pos;
+include/start_slave.inc
+connection server_1;
+INSERT INTO t1 VALUES (6, 'Done');
+# The gtid state on current master must be equal to ...
+SHOW VARIABLES LIKE 'gtid_binlog_pos';
+Variable_name Value
+gtid_binlog_pos 0-1-12
+connection server_2;
+SELECT COUNT(*) = 6 as 'true' FROM t1;
+true
+1
+# ... the gtid states on the slave:
+SHOW VARIABLES LIKE 'gtid_slave_pos';
+Variable_name Value
+gtid_slave_pos 0-1-12
+SHOW VARIABLES LIKE 'gtid_binlog_pos';
+Variable_name Value
+gtid_binlog_pos 0-1-12
+include/diff_tables.inc [server_1:t1, server_2:t1]
+# Cleanup
+connection server_1;
+DROP TABLE t1;
+connection server_2;
+include/stop_slave.inc
+connection server_1;
+set @@global.rpl_semi_sync_master_enabled = 0;
+set @@global.rpl_semi_sync_slave_enabled = 0;
+set @@global.rpl_semi_sync_master_wait_point=default;
+RESET SLAVE;
+RESET MASTER;
+connection server_2;
+set @@global.rpl_semi_sync_master_enabled = 0;
+set @@global.rpl_semi_sync_slave_enabled = 0;
+set @@global.rpl_semi_sync_master_wait_point=default;
+CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no;
+include/start_slave.inc
+connection default;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc b/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc
new file mode 100644
index 00000000000..1f24c42f680
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc
@@ -0,0 +1,77 @@
+if ($failover_to_slave)
+{
+ --let $server_to_crash=1
+ --let $server_to_promote=2
+ --let $new_master_port=$SERVER_MYPORT_2
+ --let $client_port=$SERVER_MYPORT_1
+
+ --connect (conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,)
+}
+if (!$failover_to_slave)
+{
+ --let $server_to_crash=2
+ --let $server_to_promote=1
+ --let $new_master_port=$SERVER_MYPORT_1
+ --let $client_port=$SERVER_MYPORT_2
+
+ --connect (conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,)
+}
+
+
+# Hold insert after write to binlog and before "run_commit_ordered" in engine
+
+SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go";
+--send_eval $query_to_crash
+
+# complicate recovery with an extra binlog file
+if (!$failover_to_slave)
+{
+ --connect (conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,)
+ # use the same signal with $query_to_crash
+ SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+ SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con2_go";
+ --send_eval $query2_to_crash
+}
+
+--connection server_$server_to_crash
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+--source include/kill_mysqld.inc
+
+--connection server_$server_to_promote
+--error 2003
+--source include/stop_slave.inc
+SELECT @@GLOBAL.gtid_current_pos;
+
+--let $restart_parameters=--rpl-semi-sync-slave-enabled=1
+--let $allow_rpl_inited=1
+--source include/start_mysqld.inc
+
+--connection server_$server_to_crash
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+# Check error log for correct messages.
+let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.$server_to_crash.err;
+--let SEARCH_FILE=$log_error_
+--let SEARCH_PATTERN=$log_search_pattern
+--source include/search_pattern_in_file.inc
+
+--disconnect conn_client
+
+#
+# FAIL OVER now to new master
+#
+--connection server_$server_to_promote
+set global rpl_semi_sync_master_enabled = 1;
+set global rpl_semi_sync_master_wait_point=AFTER_SYNC;
+
+--connection server_$server_to_crash
+--let $master_port=$SERVER_MYPORT_2
+if (`select $server_to_crash = 2`)
+{
+ --let $master_port=$SERVER_MYPORT_1
+}
+evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS;
+set global rpl_semi_sync_slave_enabled = 1;
+set @@global.gtid_slave_pos=@@global.gtid_binlog_pos;
+--source include/start_slave.inc
diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf
new file mode 100644
index 00000000000..f8312bdc5b8
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf
@@ -0,0 +1,11 @@
+!include suite/rpl/rpl_1slave_base.cnf
+!include include/default_client.cnf
+
+
+[mysqld.1]
+log-slave-updates
+gtid-strict-mode=1
+
+[mysqld.2]
+log-slave-updates
+gtid-strict-mode=1
diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test
new file mode 100644
index 00000000000..2c38cf4da54
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test
@@ -0,0 +1,144 @@
+# ==== Purpose ====
+#
+# Test verifies replication failover scenario.
+#
+# ==== Implementation ====
+#
+# Steps:
+# 0 - Having two servers 1 and 2 enable semi-sync replication with
+# with the master wait 'after_sync'.
+# 1 - Insert a row. While inserting second row simulate
+# a server crash at once the transaction is written to binlog, flushed
+# and synced but the binlog position is not updated.
+# 2 - Post crash-recovery on the old master execute there CHANGE MASTER
+# TO command to connect to server id 2.
+# 3 - The old master new slave server 1 must connect to the new
+# master server 2.
+# 4 - repeat the above to crash the new master and restore in role the old one
+#
+# ==== References ====
+#
+# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server
+
+
+--source include/have_innodb.inc
+--source include/have_debug_sync.inc
+--source include/have_binlog_format_row.inc
+--source include/master-slave.inc
+
+# Initial slave
+--connection server_2
+--source include/stop_slave.inc
+
+# Initial master
+--connection server_1
+RESET MASTER;
+SET @@global.max_binlog_size= 4096;
+
+--connection server_2
+RESET MASTER;
+SET @@global.max_binlog_size= 4096;
+set @@global.rpl_semi_sync_slave_enabled = 1;
+set @@global.gtid_slave_pos = "";
+CHANGE MASTER TO master_use_gtid= slave_pos;
+--source include/start_slave.inc
+
+
+--connection server_1
+ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
+set @@global.rpl_semi_sync_master_enabled = 1;
+set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC;
+
+call mtr.add_suppression("Can.t init tc log");
+call mtr.add_suppression("Aborting");
+call mtr.add_suppression("1 client is using or hasn.t closed the table properly");
+call mtr.add_suppression("Table './mtr/test_suppressions' is marked as crashed and should be repaired");
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+INSERT INTO t1 VALUES (1, 'dummy1');
+
+#
+# CRASH the original master, and FAILOVER to the new
+#
+
+# value 1 for server id 1 -> 2 failover
+--let $failover_to_slave=1
+--let $query_to_crash= INSERT INTO t1 VALUES (2, REPEAT("x", 4100))
+--let $log_search_pattern=truncated binlog file:.*master.*000001
+--source rpl_semi_sync_crash.inc
+
+--connection server_2
+--let $rows_so_far=3
+--eval INSERT INTO t1 VALUES ($rows_so_far, 'dummy3')
+--save_master_pos
+--echo # The gtid state on current master must be equal to ...
+SHOW VARIABLES LIKE 'gtid_binlog_pos';
+
+--connection server_1
+--sync_with_master
+--eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1
+--echo # ... the gtid states on the slave:
+SHOW VARIABLES LIKE 'gtid_slave_pos';
+SHOW VARIABLES LIKE 'gtid_binlog_pos';
+
+--connection server_2
+#
+# CRASH the new master and FAILOVER back to the original
+#
+
+# value 0 for the reverse server id 2 -> 1 failover
+--let $failover_to_slave=0
+--let $query_to_crash = INSERT INTO t1 VALUES (4, REPEAT("x", 4100))
+--let $query2_to_crash= INSERT INTO t1 VALUES (5, REPEAT("x", 4100))
+--let $log_search_pattern=truncated binlog file:.*slave.*000001
+--source rpl_semi_sync_crash.inc
+
+--connection server_1
+--let $rows_so_far=6
+--eval INSERT INTO t1 VALUES ($rows_so_far, 'Done')
+--save_master_pos
+--echo # The gtid state on current master must be equal to ...
+SHOW VARIABLES LIKE 'gtid_binlog_pos';
+
+--connection server_2
+--sync_with_master
+--eval SELECT COUNT(*) = $rows_so_far as 'true' FROM t1
+--echo # ... the gtid states on the slave:
+SHOW VARIABLES LIKE 'gtid_slave_pos';
+SHOW VARIABLES LIKE 'gtid_binlog_pos';
+
+
+--let $diff_tables=server_1:t1, server_2:t1
+--source include/diff_tables.inc
+
+#
+--echo # Cleanup
+#
+--connection server_1
+DROP TABLE t1;
+--save_master_pos
+
+--connection server_2
+--sync_with_master
+--source include/stop_slave.inc
+
+--connection server_1
+set @@global.rpl_semi_sync_master_enabled = 0;
+set @@global.rpl_semi_sync_slave_enabled = 0;
+set @@global.rpl_semi_sync_master_wait_point=default;
+RESET SLAVE;
+RESET MASTER;
+
+--connection server_2
+set @@global.rpl_semi_sync_master_enabled = 0;
+set @@global.rpl_semi_sync_slave_enabled = 0;
+set @@global.rpl_semi_sync_master_wait_point=default;
+
+evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no;
+--source include/start_slave.inc
+
+connection default;
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--source include/rpl_end.inc
diff --git a/sql/handler.cc b/sql/handler.cc
index fc2c46395c3..78ee18a4542 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -1508,6 +1508,24 @@ uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info)
return rw_ha_count;
}
+/*
+ Returns counted number of
+ read-write recoverable transaction participants.
+*/
+uint ha_count_rw_2pc(THD *thd, bool all)
+{
+ unsigned rw_ha_count= 0;
+ THD_TRANS *trans=all ? &thd->transaction->all : &thd->transaction->stmt;
+
+ for (Ha_trx_info * ha_info= trans->ha_list; ha_info;
+ ha_info= ha_info->next())
+ {
+ if (ha_info->is_trx_read_write() && ha_info->ht()->recover)
+ ++rw_ha_count;
+ }
+ return rw_ha_count;
+}
+
/**
Check if we can skip the two-phase commit.
@@ -1527,7 +1545,6 @@ uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info)
engines with read-write changes.
*/
-static
uint
ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
bool all)
@@ -1978,6 +1995,24 @@ int ha_commit_one_phase(THD *thd, bool all)
DBUG_RETURN(res);
}
+static bool is_ro_1pc_trans(THD *thd, Ha_trx_info *ha_info, bool all,
+ bool is_real_trans)
+{
+ uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
+ bool rw_trans= is_real_trans &&
+ (rw_ha_count > (thd->is_current_stmt_binlog_disabled()?0U:1U));
+
+ return !rw_trans;
+}
+
+static bool has_binlog_hton(Ha_trx_info *ha_info)
+{
+ bool rc;
+ for (rc= false; ha_info && !rc; ha_info= ha_info->next())
+ rc= ha_info->ht() == binlog_hton;
+
+ return rc;
+}
static int
commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
@@ -1991,9 +2026,17 @@ commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
if (ha_info)
{
+ int err;
+
+ if (has_binlog_hton(ha_info) &&
+ (err= binlog_commit(thd, all,
+ is_ro_1pc_trans(thd, ha_info, all, is_real_trans))))
+ {
+ my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+ error= 1;
+ }
for (; ha_info; ha_info= ha_info_next)
{
- int err;
handlerton *ht= ha_info->ht();
if ((err= ht->commit(ht, thd, all)))
{
@@ -2219,6 +2262,15 @@ int ha_commit_or_rollback_by_xid(XID *xid, bool commit)
xaop.xid= xid;
xaop.result= 1;
+ /*
+ When the binlogging service is enabled complete the transaction
+ by it first.
+ */
+ if (commit)
+ binlog_commit_by_xid(binlog_hton, xid);
+ else
+ binlog_rollback_by_xid(binlog_hton, xid);
+
plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
@@ -2314,7 +2366,7 @@ static my_xid wsrep_order_and_check_continuity(XID *list, int len)
recover() step of xa.
@note
- there are three modes of operation:
+ there are four modes of operation:
- automatic recover after a crash
in this case commit_list != 0, tc_heuristic_recover==0
all xids from commit_list are committed, others are rolled back
@@ -2325,6 +2377,9 @@ static my_xid wsrep_order_and_check_continuity(XID *list, int len)
- no recovery (MySQL did not detect a crash)
in this case commit_list==0, tc_heuristic_recover == 0
there should be no prepared transactions in this case.
+ - automatic recovery for the semisync slave server: uncommitted
+ transactions are rolled back and when they are in binlog it gets
+ truncated to the first uncommitted transaction start offset.
*/
struct xarecover_st
{
@@ -2332,8 +2387,181 @@ struct xarecover_st
XID *list;
HASH *commit_list;
bool dry_run;
+ MEM_ROOT *mem_root;
+ bool error;
+};
+
+/**
+ Inserts a new hash member.
+
+ returns a successfully created and inserted @c xid_recovery_member
+ into hash @c hash_arg,
+ or NULL.
+*/
+static xid_recovery_member*
+xid_member_insert(HASH *hash_arg, my_xid xid_arg, MEM_ROOT *ptr_mem_root)
+{
+ xid_recovery_member *member= (xid_recovery_member*)
+ alloc_root(ptr_mem_root, sizeof(xid_recovery_member));
+ if (!member)
+ return NULL;
+
+ member->xid= xid_arg;
+ member->in_engine_prepare= 1;
+ member->decided_to_commit= false;
+
+ return my_hash_insert(hash_arg, (uchar*) member) ? NULL : member;
+}
+
+/*
+ Inserts a new or updates an existing hash member to increment
+ the member's prepare counter.
+
+ returns false on success,
+ true otherwise.
+*/
+static bool xid_member_replace(HASH *hash_arg, my_xid xid_arg,
+ MEM_ROOT *ptr_mem_root)
+{
+ xid_recovery_member* member;
+ if ((member= (xid_recovery_member *)
+ my_hash_search(hash_arg, (uchar *)& xid_arg, sizeof(xid_arg))))
+ member->in_engine_prepare++;
+ else
+ member= xid_member_insert(hash_arg, xid_arg, ptr_mem_root);
+
+ return member == NULL;
+}
+
+/*
+ A "transport" type for recovery completion with ha_recover_complete()
+*/
+struct xarecover_complete_arg
+{
+ xid_recovery_member* member;
+ Binlog_offset *binlog_coord;
+ uint count;
};
+/*
+ Flagged to commit member confirms to get committed.
+ Otherwise when
+ A. ptr_commit_max is NULL (implies the normal recovery), or
+ B. it's not NULL (can only be so in the semisync slave case)
+ and the value referenced is not greater than the member's coordinate
+ the decision is to rollback.
+ When both A,B do not hold - which is the semisync slave recovery
+ case - the decision is to commit.
+
+ Returns true as commmit decision
+ false as rollback one
+*/
+static bool xarecover_decide_to_commit(xid_recovery_member* member,
+ Binlog_offset *ptr_commit_max)
+{
+ return
+ member->decided_to_commit ? true :
+ !ptr_commit_max ? false :
+ (member->binlog_coord < *ptr_commit_max ? // semisync slave recovery
+ true : false);
+}
+
+/*
+ Helper function for xarecover_do_commit_or_rollback_handlerton.
+ For a given hton decides what to do with a xid passed in the 2nd arg
+ and carries out the decision.
+*/
+static void xarecover_do_commit_or_rollback(handlerton *hton,
+ xarecover_complete_arg *arg)
+{
+ xid_t x;
+ my_bool rc;
+ xid_recovery_member *member= arg->member;
+ Binlog_offset *ptr_commit_max= arg->binlog_coord;
+
+ x.set(member->xid);
+
+ rc= xarecover_decide_to_commit(member, ptr_commit_max) ?
+ hton->commit_by_xid(hton, &x) : hton->rollback_by_xid(hton, &x);
+
+ /*
+ It's fine to have non-zero rc which would be from transaction
+ non-participant hton:s.
+ */
+ DBUG_ASSERT(rc || member->in_engine_prepare > 0);
+
+ if (!rc)
+ {
+ /*
+ This block relies on Engine to report XAER_NOTA at
+ "complete"_by_xid for unknown xid.
+ */
+ member->in_engine_prepare--;
+ if (global_system_variables.log_warnings > 2)
+ sql_print_information("%s transaction with xid %llu",
+ member->decided_to_commit ? "Committed" :
+ "Rolled back", (ulonglong) member->xid);
+ }
+}
+
+/*
+ Per hton recovery decider function.
+*/
+static my_bool xarecover_do_commit_or_rollback_handlerton(THD *unused,
+ plugin_ref plugin,
+ void *arg)
+{
+ handlerton *hton= plugin_hton(plugin);
+
+ if (hton->recover)
+ {
+ xarecover_do_commit_or_rollback(hton, (xarecover_complete_arg *) arg);
+ }
+
+ return FALSE;
+}
+
+/*
+ Completes binlog recovery for an input xid in the passed
+ member_arg to invoke decider functions for each handlerton.
+
+ Returns always FALSE.
+*/
+static my_bool xarecover_complete_and_count(void *member_arg,
+ void *param_arg)
+{
+ xid_recovery_member *member= (xid_recovery_member*) member_arg;
+ xarecover_complete_arg *complete_params=
+ (xarecover_complete_arg*) param_arg;
+ complete_params->member= member;
+
+ (void) plugin_foreach(NULL, xarecover_do_commit_or_rollback_handlerton,
+ MYSQL_STORAGE_ENGINE_PLUGIN, complete_params);
+
+ if (member->in_engine_prepare)
+ {
+ complete_params->count++;
+ if (global_system_variables.log_warnings > 2)
+ sql_print_warning("Found prepared transaction with xid %llu",
+ (ulonglong) member->xid);
+ }
+
+ return false;
+}
+
+/*
+ Completes binlog recovery to invoke decider functions for
+ each xid.
+ Returns the number of transactions remained doubtful.
+*/
+uint ha_recover_complete(HASH *commit_list, Binlog_offset *coord)
+{
+ xarecover_complete_arg complete= { NULL, coord, 0 };
+ (void) my_hash_iterate(commit_list, xarecover_complete_and_count, &complete);
+
+ return complete.count;
+}
+
static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
void *arg)
{
@@ -2395,7 +2623,20 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
info->found_my_xids++;
continue;
}
- // recovery mode
+
+ /*
+ Regular and semisync slave server recovery only collects
+ xids to make decisions on them later by the caller.
+ */
+ if (info->mem_root)
+ {
+ if (xid_member_replace(info->commit_list, x, info->mem_root))
+ {
+ info->error= true;
+ sql_print_error("Error in memory allocation at xarecover_handlerton");
+ break;
+ }
+ } else
if (IF_WSREP((wsrep_emulate_bin_log &&
wsrep_is_wsrep_xid(info->list + i) &&
x <= wsrep_limit), false) ||
@@ -2431,7 +2672,7 @@ static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
return FALSE;
}
-int ha_recover(HASH *commit_list)
+int ha_recover(HASH *commit_list, MEM_ROOT *arg_mem_root)
{
struct xarecover_st info;
DBUG_ENTER("ha_recover");
@@ -2439,6 +2680,8 @@ int ha_recover(HASH *commit_list)
info.commit_list= commit_list;
info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
info.list= NULL;
+ info.mem_root= arg_mem_root;
+ info.error= false;
/* commit_list and tc_heuristic_recover cannot be set both */
DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0);
@@ -2483,6 +2726,9 @@ int ha_recover(HASH *commit_list)
info.found_my_xids, opt_tc_log_file);
DBUG_RETURN(1);
}
+ if (info.error)
+ DBUG_RETURN(1);
+
if (info.commit_list)
sql_print_information("Crash table recovery finished.");
DBUG_RETURN(0);
diff --git a/sql/handler.h b/sql/handler.h
index a7c455ae7c9..d2bb514f5cf 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -44,6 +44,7 @@
#include <mysql/psi/mysql_table.h>
#include "sql_sequence.h"
#include "mem_root_array.h"
+#include <utility> // pair
class Alter_info;
class Virtual_column_info;
@@ -931,6 +932,32 @@ struct xid_t {
};
typedef struct xid_t XID;
+/*
+ Enumerates a sequence in the order of
+ their creation that is in the top-down order of the index file.
+ Ranges from zero through MAX_binlog_id.
+ Not confuse the value with the binlog file numerical suffix,
+ neither with the binlog file line in the binlog index file.
+*/
+typedef uint Binlog_file_id;
+const Binlog_file_id MAX_binlog_id= UINT_MAX;
+/*
+ Compound binlog-id and byte offset of transaction's first event
+ in a sequence (e.g the recovery sequence) of binlog files.
+ Binlog_offset(0,0) is the minimum value to mean
+ the first byte of the first binlog file.
+*/
+typedef std::pair<Binlog_file_id, my_off_t> Binlog_offset;
+
+/* binlog-based recovery transaction descriptor */
+struct xid_recovery_member
+{
+ my_xid xid;
+ uint in_engine_prepare; // number of engines that have xid prepared
+ bool decided_to_commit;
+ Binlog_offset binlog_coord; // semisync recovery binlog offset
+};
+
/* for recover() handlerton call */
#define MIN_XID_LIST_SIZE 128
#define MAX_XID_LIST_SIZE (1024*128)
@@ -5320,7 +5347,8 @@ int ha_commit_one_phase(THD *thd, bool all);
int ha_commit_trans(THD *thd, bool all);
int ha_rollback_trans(THD *thd, bool all);
int ha_prepare(THD *thd);
-int ha_recover(HASH *commit_list);
+int ha_recover(HASH *commit_list, MEM_ROOT *mem_root= NULL);
+uint ha_recover_complete(HASH *commit_list, Binlog_offset *coord= NULL);
/* transactions: these functions never call handlerton functions directly */
int ha_enable_transaction(THD *thd, bool on);
@@ -5448,4 +5476,8 @@ int del_global_index_stat(THD *thd, TABLE* table, KEY* key_info);
int del_global_table_stat(THD *thd, const LEX_CSTRING *db, const LEX_CSTRING *table);
uint ha_count_rw_all(THD *thd, Ha_trx_info **ptr_ha_info);
bool non_existing_table_error(int error);
+uint ha_count_rw_2pc(THD *thd, bool all);
+uint ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
+ bool all);
+
#endif /* HANDLER_INCLUDED */
diff --git a/sql/log.cc b/sql/log.cc
index f1fef44e05f..047d644e5da 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -63,6 +63,12 @@
#include "wsrep_trans_observer.h"
#endif /* WITH_WSREP */
+#ifdef HAVE_REPLICATION
+#include "semisync_master.h"
+#include "semisync_slave.h"
+#include <utility> // pair
+#endif
+
/* max size of the log message */
#define MAX_LOG_BUFFER_SIZE 1024
#define MAX_TIME_SIZE 32
@@ -88,16 +94,12 @@ static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
THD *thd);
-static int binlog_commit(handlerton *hton, THD *thd, bool all);
static int binlog_rollback(handlerton *hton, THD *thd, bool all);
static int binlog_prepare(handlerton *hton, THD *thd, bool all);
-static int binlog_xa_recover_dummy(handlerton *hton, XID *xid_list, uint len);
-static int binlog_commit_by_xid(handlerton *hton, XID *xid);
-static int binlog_rollback_by_xid(handlerton *hton, XID *xid);
static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
static int binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
Log_event *end_ev, bool all, bool using_stmt,
- bool using_trx);
+ bool using_trx, bool is_ro_1pc);
static const LEX_CSTRING write_error_msg=
{ STRING_WITH_LEN("error writing to the binary log") };
@@ -1693,17 +1695,13 @@ int binlog_init(void *p)
binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
binlog_hton->savepoint_rollback_can_release_mdl=
binlog_savepoint_rollback_can_release_mdl;
- binlog_hton->commit= binlog_commit;
+ binlog_hton->commit= [](handlerton *, THD *thd, bool all) { return 0; };
binlog_hton->rollback= binlog_rollback;
binlog_hton->drop_table= [](handlerton *, const char*) { return -1; };
if (WSREP_ON || opt_bin_log)
{
binlog_hton->prepare= binlog_prepare;
binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
- binlog_hton->commit_by_xid= binlog_commit_by_xid;
- binlog_hton->rollback_by_xid= binlog_rollback_by_xid;
- // recover needs to be set to make xa{commit,rollback}_handlerton effective
- binlog_hton->recover= binlog_xa_recover_dummy;
}
binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN | HTON_NO_ROLLBACK;
return 0;
@@ -1770,7 +1768,7 @@ static int binlog_close_connection(handlerton *hton, THD *thd)
static int
binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
Log_event *end_ev, bool all, bool using_stmt,
- bool using_trx)
+ bool using_trx, bool is_ro_1pc= false)
{
int error= 0;
DBUG_ENTER("binlog_flush_cache");
@@ -1797,7 +1795,8 @@ binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
*/
error= mysql_bin_log.write_transaction_to_binlog(thd, cache_mngr,
end_ev, all,
- using_stmt, using_trx);
+ using_stmt, using_trx,
+ is_ro_1pc);
}
else
{
@@ -1871,7 +1870,8 @@ inline size_t serialize_with_xid(XID *xid, char *buf,
nonzero if an error pops up when flushing the cache.
*/
static inline int
-binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr)
+binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr,
+ bool ro_1pc)
{
DBUG_ENTER("binlog_commit_flush_trx_cache");
@@ -1892,7 +1892,7 @@ binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr)
}
Query_log_event end_evt(thd, buf, buflen, TRUE, TRUE, TRUE, 0);
- DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
+ DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE, ro_1pc));
}
@@ -2011,35 +2011,30 @@ inline bool is_preparing_xa(THD *thd)
static int binlog_prepare(handlerton *hton, THD *thd, bool all)
{
/* Do nothing unless the transaction is a user XA. */
- return is_preparing_xa(thd) ? binlog_commit(NULL, thd, all) : 0;
-}
-
-
-static int binlog_xa_recover_dummy(handlerton *hton __attribute__((unused)),
- XID *xid_list __attribute__((unused)),
- uint len __attribute__((unused)))
-{
- /* Does nothing. */
- return 0;
+ return is_preparing_xa(thd) ? binlog_commit(thd, all, FALSE) : 0;
}
-static int binlog_commit_by_xid(handlerton *hton, XID *xid)
+int binlog_commit_by_xid(handlerton *hton, XID *xid)
{
THD *thd= current_thd;
+ if (thd->is_current_stmt_binlog_disabled())
+ return 0;
(void) thd->binlog_setup_trx_data();
DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_COMMIT);
- return binlog_commit(hton, thd, TRUE);
+ return binlog_commit(thd, TRUE, FALSE);
}
-static int binlog_rollback_by_xid(handlerton *hton, XID *xid)
+int binlog_rollback_by_xid(handlerton *hton, XID *xid)
{
THD *thd= current_thd;
+ if (thd->is_current_stmt_binlog_disabled())
+ return 0;
(void) thd->binlog_setup_trx_data();
DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_ROLLBACK ||
@@ -2123,20 +2118,17 @@ static int binlog_commit_flush_xa_prepare(THD *thd, bool all,
return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
}
-
/**
This function is called once after each statement.
It has the responsibility to flush the caches to the binary log on commits.
- @param hton The binlog handlerton.
@param thd The client thread that executes the transaction.
@param all This is @c true if this is a real transaction commit, and
@false otherwise.
-
- @see handlerton::commit
+ @param ro_1pc read-only one-phase commit transaction
*/
-static int binlog_commit(handlerton *hton, THD *thd, bool all)
+int binlog_commit(THD *thd, bool all, bool ro_1pc)
{
int error= 0;
PSI_stage_info org_stage;
@@ -2168,7 +2160,6 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all)
YESNO(thd->transaction->all.modified_non_trans_table),
YESNO(thd->transaction->stmt.modified_non_trans_table)));
-
thd->backup_stage(&org_stage);
THD_STAGE_INFO(thd, stage_binlog_write);
if (!cache_mngr->stmt_cache.empty())
@@ -2197,8 +2188,9 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all)
{
error= is_preparing_xa(thd) ?
binlog_commit_flush_xa_prepare(thd, all, cache_mngr) :
- binlog_commit_flush_trx_cache (thd, all, cache_mngr);
+ binlog_commit_flush_trx_cache (thd, all, cache_mngr, ro_1pc);
}
+
/*
This is part of the stmt rollback.
*/
@@ -6228,7 +6220,8 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
bool
MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone,
- bool is_transactional, uint64 commit_id)
+ bool is_transactional, uint64 commit_id,
+ bool has_xid, bool is_ro_1pc)
{
rpl_gtid gtid;
uint32 domain_id;
@@ -6281,7 +6274,7 @@ MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone,
Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone,
LOG_EVENT_SUPPRESS_USE_F, is_transactional,
- commit_id);
+ commit_id, has_xid, is_ro_1pc);
/* Write the event to the binary log. */
DBUG_ASSERT(this == &mysql_bin_log);
@@ -7610,7 +7603,8 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
binlog_cache_mngr *cache_mngr,
Log_event *end_ev, bool all,
bool using_stmt_cache,
- bool using_trx_cache)
+ bool using_trx_cache,
+ bool is_ro_1pc)
{
group_commit_entry entry;
Ha_trx_info *ha_info;
@@ -7639,6 +7633,7 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
entry.using_trx_cache= using_trx_cache;
entry.need_unlog= is_preparing_xa(thd);
ha_info= all ? thd->transaction->all.ha_list : thd->transaction->stmt.ha_list;
+ entry.ro_1pc= is_ro_1pc;
for (; !entry.need_unlog && ha_info; ha_info= ha_info->next())
{
@@ -8513,10 +8508,13 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
uint64 commit_id)
{
binlog_cache_mngr *mngr= entry->cache_mngr;
+ bool has_xid= entry->end_event->get_type_code() == XID_EVENT;
+
DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt");
if (write_gtid_event(entry->thd, is_prepared_xa(entry->thd),
- entry->using_trx_cache, commit_id))
+ entry->using_trx_cache, commit_id,
+ has_xid, entry->ro_1pc))
DBUG_RETURN(ER_ERROR_ON_WRITE);
if (entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
@@ -9247,6 +9245,11 @@ TC_LOG::run_commit_ordered(THD *thd, bool all)
if (!ht->commit_ordered)
continue;
ht->commit_ordered(ht, thd, all);
+ DBUG_EXECUTE_IF("enable_log_write_upto_crash",
+ {
+ DBUG_SET_INITIAL("+d,crash_after_log_write_upto");
+ sleep(1000);
+ });
DEBUG_SYNC(thd, "commit_after_run_commit_ordered");
}
}
@@ -9979,6 +9982,151 @@ int TC_LOG::using_heuristic_recover()
/****** transaction coordinator log for 2pc - binlog() based solution ******/
#define TC_LOG_BINLOG MYSQL_BIN_LOG
+/**
+ Truncates the current binlog to specified position. Removes the rest of binlogs
+ which are present after this binlog file.
+
+ @param truncate_file Holds the binlog name to be truncated
+ @param truncate_pos Position within binlog from where it needs to
+ truncated.
+
+ @retval true ok
+ @retval false error
+
+*/
+bool MYSQL_BIN_LOG::truncate_and_remove_binlogs(const char *file_name,
+ my_off_t pos,
+ rpl_gtid *ptr_gtid)
+{
+ int error= 0;
+#ifdef HAVE_REPLICATION
+ LOG_INFO log_info;
+ THD *thd= current_thd;
+ my_off_t index_file_offset= 0;
+ File file= -1;
+ MY_STAT s;
+ my_off_t old_size;
+
+ if ((error= find_log_pos(&log_info, file_name, 1)))
+ {
+ sql_print_error("Failed to locate binary log file:%s."
+ "Error:%d", file_name, error);
+ goto end;
+ }
+
+ while (!(error= find_next_log(&log_info, 1)))
+ {
+ if (!index_file_offset)
+ {
+ index_file_offset= log_info.index_file_start_offset;
+ if ((error= open_purge_index_file(TRUE)))
+ {
+ sql_print_error("Failed to open purge index "
+ "file:%s. Error:%d", purge_index_file_name, error);
+ goto end;
+ }
+ }
+ if ((error= register_purge_index_entry(log_info.log_file_name)))
+ {
+ sql_print_error("Failed to copy %s to purge index"
+ " file. Error:%d", log_info.log_file_name, error);
+ goto end;
+ }
+ }
+
+ if (error != LOG_INFO_EOF)
+ {
+ sql_print_error("Failed to find the next binlog to "
+ "add to purge index register. Error:%d", error);
+ goto end;
+ }
+
+ if (is_inited_purge_index_file())
+ {
+ if (!index_file_offset)
+ index_file_offset= log_info.index_file_start_offset;
+
+ if ((error= sync_purge_index_file()))
+ {
+ sql_print_error("Failed to flush purge index "
+ "file. Error:%d", error);
+ goto end;
+ }
+
+ // Trim index file
+ error= mysql_file_chsize(index_file.file, index_file_offset, '\n',
+ MYF(MY_WME));
+ if (!error)
+ error= mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE));
+ if (error)
+ {
+ sql_print_error("Failed to truncate binlog index "
+ "file:%s to offset:%llu. Error:%d", index_file_name,
+ index_file_offset, error);
+ goto end;
+ }
+
+ /* Reset data in old index cache */
+ if ((error= reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 1)))
+ {
+ sql_print_error("Failed to reinit binlog index "
+ "file. Error:%d", error);
+ goto end;
+ }
+
+ /* Read each entry from purge_index_file and delete the file. */
+ if ((error= purge_index_entry(thd, NULL, TRUE)))
+ {
+ sql_print_error("Failed to process registered "
+ "files that would be purged.");
+ goto end;
+ }
+ }
+
+ DBUG_ASSERT(pos);
+
+ if ((file= mysql_file_open(key_file_binlog, file_name,
+ O_RDWR | O_BINARY, MYF(MY_WME))) < 0)
+ {
+ error= 1;
+ sql_print_error("Failed to open binlog file:%s for "
+ "truncation.", file_name);
+ goto end;
+ }
+ my_stat(file_name, &s, MYF(0));
+ old_size= s.st_size;
+ clear_inuse_flag_when_closing(file);
+ /* Change binlog file size to truncate_pos */
+ error= mysql_file_chsize(file, pos, 0, MYF(MY_WME));
+ if (!error)
+ error= mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE));
+ if (error)
+ {
+ sql_print_error("Failed to truncate the "
+ "binlog file:%s to size:%llu. Error:%d",
+ file_name, pos, error);
+ goto end;
+ }
+ else
+ {
+ char buf[21];
+ longlong10_to_str(ptr_gtid->seq_no, buf, 10);
+ sql_print_information("Successfully truncated binlog file:%s "
+ "from previous file size %llu "
+ "to pos:%llu to remove transactions starting from "
+ "GTID %u-%u-%s",
+ file_name, old_size, pos,
+ ptr_gtid->domain_id, ptr_gtid->server_id, buf);
+ }
+
+end:
+ if (file >= 0)
+ mysql_file_close(file, MYF(MY_WME));
+
+ error= error || close_purge_index_file();
+#endif
+ return error > 0;
+}
int TC_LOG_BINLOG::open(const char *opt_name)
{
int error= 1;
@@ -10454,7 +10602,520 @@ start_binlog_background_thread()
return 0;
}
+#ifdef HAVE_REPLICATION
+class Recovery_context
+{
+public:
+ my_off_t prev_event_pos;
+ rpl_gtid last_gtid;
+ bool last_gtid_standalone;
+ bool last_gtid_valid;
+ bool last_gtid_no2pc; // true when the group does not end with Xid event
+ uint last_gtid_engines;
+ Binlog_offset last_gtid_coord; // <binlog id, binlog offset>
+ /*
+ When true, it's semisync slave recovery mode
+ rolls back transactions in doubt and wipes them off from binlog.
+ The rest of declarations deal with this type of recovery.
+ */
+ bool do_truncate;
+ /*
+ transaction-in-doubt's gtid:s. `truncate_gtid` is the ultimate value,
+ if it's non-zero truncation is taking place to start from it.
+ Its value gets refined throughout binlog scanning conducted with at most
+ 2 rounds.
+ When an estimate is done in the 1st round of 2-round recovery its value
+ gets memorized for possible adoption as the ultimate `truncate_gtid`.
+ */
+ rpl_gtid truncate_gtid, truncate_gtid_1st_round;
+ /*
+ the last non-transactional group that is located in binlog
+ behind truncate_gtid.
+ */
+ rpl_gtid binlog_unsafe_gtid;
+ char binlog_truncate_file_name[FN_REFLEN] ;
+ char binlog_unsafe_file_name[FN_REFLEN] ;
+ /*
+ When do_truncate is true, the truncate position may not be
+ found in one round when recovered transactions are multi-engine
+ or just on different engines.
+ In the single recoverable engine case `truncate_reset_done` and
+ therefore `truncate_validated` remains `false` when the last
+ binlog is the binlog-checkpoint one.
+ The meaning of `truncate_reset_done` is according to the following example:
+ Let round = 1, Binlog contains the sequence of replication event groups:
+ [g1, G2, g3]
+ where `G` (in capital) stands for committed, `g` for prepared.
+ g1 is first set as truncation candidate, then G2 reset it to indicate
+ the actual truncation is behind (to the right of) it.
+ `truncate_validated` is set to true when `binlog_truncate_pos` (as of `g3`)
+ won't change.
+ Observe last_gtid_valid is affected, so in the above example `g1` that
+ was initially ignored for the gtid binlog state now seeing `G2`
+ would have to be added to it. See gtid_maybe_to_truncate.
+ */
+ bool truncate_validated; // trued when the truncate position settled
+ bool truncate_reset_done; // trued when the position is to reevaluate
+ /* Flags the fact of truncate position estimation is done the 1st round */
+ bool truncate_set_in_1st;
+ /*
+ Monotonically indexes binlog files in the recovery list.
+ When the list is "likely" singleton the value is UINT_MAX.
+ Otherwise enumeration starts with zero for the first file, increments
+ by one for any next file except for the last file in the list, which
+ is also the initial binlog file for recovery,
+ that is enumberated with UINT_MAX.
+ */
+ Binlog_file_id id_binlog;
+ enum_binlog_checksum_alg checksum_alg;
+ Binlog_offset binlog_truncate_coord,
+ binlog_truncate_coord_1st_round; // pair is similar to truncate_gtid
+ Binlog_offset binlog_unsafe_coord;
+ /*
+ Populated at decide_or_assess() with gtid-in-doubt whose
+ binlog offset greater of equal by that of the current gtid truncate
+ candidate.
+ Gets empited by reset_truncate_coord into gtid binlog state.
+ */
+ Dynamic_array<rpl_gtid> *gtid_maybe_to_truncate;
+ Recovery_context();
+ ~Recovery_context() { delete gtid_maybe_to_truncate; }
+ /*
+ Completes the recovery procedure.
+ In the normal case prepared xids gets committed when they also found
+ in binlog, otherwise they are rolled back.
+ In the semisync slave case the xids that are located in binlog in
+ a truncated tail get rolled back, otherwise they are committed.
+ Both decisions are contingent on safety to truncate.
+ */
+ bool complete(MYSQL_BIN_LOG *log, HASH &xids);
+
+ /*
+ decides on commit of xid passed through member argument.
+ In the semisync slave case it assigns binlog coordinate to
+ any xid that remains in-doubt. Decision on them will be
+ done after binlog scan rounds.
+ */
+ bool decide_or_assess(xid_recovery_member *member, int round,
+ Format_description_log_event *fdle,
+ LOG_INFO *linfo, my_off_t pos);
+
+ /*
+ Assigns last_gtid and assesses the maximum (in the binlog offset term)
+ unsafe gtid (group of events).
+ */
+ void process_gtid(int round, Gtid_log_event *gev, LOG_INFO *linfo);
+
+ /*
+ Compute next action at the end of processing of the current binlog file.
+ It may increment the round.
+ When the round turns in the semisync-slave recovery
+ binlog_id, truncate_validated, truncate_reset_done
+ gets reset/set for the next round.
+ Within the 2nd round id_binlog keeps incrementing.
+
+ Passed arguments:
+ round the current round that *may* be increment here
+ last_log_name the recovery starting binlog file
+ binlog_checkpoint_name
+ binlog checkpoint file
+ linfo binlog file list struct for next file
+ log pointer to mysql_bin_log instance
+
+ Returns: 0 when rounds continue, maybe the current one remains
+ 1 when all rounds are done
+ */
+ int next_binlog_or_round(int& round,
+ const char *last_log_name,
+ const char *binlog_checkpoint_name,
+ LOG_INFO *linfo, MYSQL_BIN_LOG *log);
+ /*
+ Relates to the semisync recovery.
+ Returns true when truncated tail does not contain non-transactional
+ group of events.
+ Otherwise returns false.
+ */
+ bool is_safe_to_truncate()
+ {
+ return !do_truncate ? true :
+ (truncate_gtid.seq_no == 0 || // no truncate
+ binlog_unsafe_coord < binlog_truncate_coord); // or unsafe is earlier
+ }
+
+ /*
+ Relates to the semisync recovery.
+ Is invoked when a standalone or non-2pc group is detected.
+ Both are unsafe to truncate in the semisync-slave recovery so
+ the maximum unsafe coordinate may be updated.
+ In the non-2pc group case though, *exeptionally*,
+ the no-engine group is considered safe, to be invalidated
+ to not contribute to binlog state.
+ */
+ void update_binlog_unsafe_coord_if_needed(LOG_INFO *linfo);
+
+ /*
+ Relates to the semisync recovery.
+ Is called when a committed or decided to-commit transaction is detected.
+ Actions:
+ truncate_gtid then is set to "nil" as indicated by rpl_gtid::seq_no := 0.
+ truncate_reset_done takes a note of that fact.
+ binlog_truncate_coord gets reset to the current gtid offset merely to
+ "suggest" any potential future truncate gtid must have a greater offset.
+ gtid_maybe_to_truncate gets emptied into gtid binlog state.
+
+ Returns:
+ false on success, otherwise
+ true when OOM at rpl_global_gtid_binlog_state insert
+ */
+ bool reset_truncate_coord(my_off_t pos);
+
+ /*
+ Sets binlog_truncate_pos to the value of the current transaction's gtid.
+ In multi-engine case that might be just an assessment to be refined
+ in the current round and confirmed in a next one.
+ gtid_maybe_to_truncate receives the current gtid as a new element.
+ Returns
+ false on success, otherwise
+ true when OOM at gtid_maybe_to_truncate append
+
+ */
+ bool set_truncate_coord(LOG_INFO *linfo, int round,
+ enum_binlog_checksum_alg fd_checksum_alg);
+};
+
+bool Recovery_context::complete(MYSQL_BIN_LOG *log, HASH &xids)
+{
+ if (!do_truncate || is_safe_to_truncate())
+ {
+ uint count_in_prepare=
+ ha_recover_complete(&xids,
+ !do_truncate ? NULL :
+ (truncate_gtid.seq_no > 0 ?
+ &binlog_truncate_coord : &last_gtid_coord));
+
+ if (count_in_prepare > 0 && global_system_variables.log_warnings > 2)
+ {
+ sql_print_warning("Could not complete %u number of transactions.",
+ count_in_prepare);
+ return false; // there's later dry run ha_recover() to error out
+ }
+ }
+
+ /* Truncation is not done when there's no transaction to roll back */
+ if (do_truncate && truncate_gtid.seq_no > 0)
+ {
+ if (is_safe_to_truncate())
+ {
+ if (log->truncate_and_remove_binlogs(binlog_truncate_file_name,
+ binlog_truncate_coord.second,
+ &truncate_gtid))
+ {
+ sql_print_error("Failed to truncate the binary log to "
+ "file:%s pos:%llu.", binlog_truncate_file_name,
+ binlog_truncate_coord.second);
+ return true;
+ }
+ }
+ else
+ {
+ sql_print_error("Cannot truncate the binary log to file:%s "
+ "pos:%llu as unsafe statement "
+ "is found at file:%s pos:%llu which is "
+ "beyond the truncation position;"
+ "all transactions in doubt are left intact. ",
+ binlog_truncate_file_name, binlog_truncate_coord.second,
+ binlog_unsafe_file_name, binlog_unsafe_coord.second);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+Recovery_context::Recovery_context() :
+ prev_event_pos(0),
+ last_gtid_standalone(false), last_gtid_valid(false), last_gtid_no2pc(false),
+ last_gtid_engines(0),
+ do_truncate(rpl_semi_sync_slave_enabled),
+ truncate_validated(false), truncate_reset_done(false),
+ truncate_set_in_1st(false), id_binlog(MAX_binlog_id),
+ checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF), gtid_maybe_to_truncate(NULL)
+{
+ last_gtid_coord= Binlog_offset(0,0);
+ binlog_truncate_coord= binlog_truncate_coord_1st_round= Binlog_offset(0,0);
+ binlog_unsafe_coord= Binlog_offset(0,0);
+ binlog_truncate_file_name[0]= 0;
+ binlog_unsafe_file_name [0]= 0;
+ binlog_unsafe_gtid= truncate_gtid= truncate_gtid_1st_round= rpl_gtid();
+ if (do_truncate)
+ gtid_maybe_to_truncate= new Dynamic_array<rpl_gtid>(16, 16);
+}
+
+bool Recovery_context::reset_truncate_coord(my_off_t pos)
+{
+ DBUG_ASSERT(binlog_truncate_coord.second == 0 ||
+ last_gtid_coord >= binlog_truncate_coord ||
+ truncate_set_in_1st);
+ // save as backup to restore at next_binlog_or_round when necessary
+ if (truncate_set_in_1st && truncate_gtid_1st_round.seq_no == 0)
+ {
+ truncate_gtid_1st_round= truncate_gtid;
+ binlog_truncate_coord_1st_round= binlog_truncate_coord;
+ }
+ binlog_truncate_coord= Binlog_offset(id_binlog, pos);
+ truncate_gtid= rpl_gtid();
+ truncate_reset_done= true;
+ for (uint i= 0; i < gtid_maybe_to_truncate->elements(); i++)
+ {
+ rpl_gtid gtid= gtid_maybe_to_truncate->at(i);
+ if (rpl_global_gtid_binlog_state.update_nolock(&gtid, false))
+ return true;
+ }
+ gtid_maybe_to_truncate->clear();
+
+ return false;
+}
+
+bool Recovery_context::set_truncate_coord(LOG_INFO *linfo, int round,
+ enum_binlog_checksum_alg fd_checksum)
+{
+ binlog_truncate_coord= last_gtid_coord;
+ strmake_buf(binlog_truncate_file_name, linfo->log_file_name);
+
+ truncate_gtid= last_gtid;
+ checksum_alg= fd_checksum;
+ truncate_set_in_1st= (round == 1);
+
+ return gtid_maybe_to_truncate->append(last_gtid);
+}
+
+bool Recovery_context::decide_or_assess(xid_recovery_member *member, int round,
+ Format_description_log_event *fdle,
+ LOG_INFO *linfo, my_off_t pos)
+{
+ if (member)
+ {
+ /*
+ xid in doubt are resolved as follows:
+ in_engine_prepare is compared agaist binlogged info to
+ yield the commit-or-rollback decision in the normal case.
+ In the semisync-slave recovery the decision is done later
+ after the binlog scanning has determined the truncation offset.
+ */
+ if (member->in_engine_prepare > last_gtid_engines)
+ {
+ char buf[21];
+ longlong10_to_str(last_gtid.seq_no, buf, 10);
+ sql_print_error("Error to recovery multi-engine transaction: "
+ "the number of engines prepared %u exceeds the "
+ "respective number %u in its GTID %u-%u-%s "
+ "located at file:%s pos:%llu",
+ member->in_engine_prepare, last_gtid_engines,
+ last_gtid.domain_id, last_gtid.server_id, buf,
+ linfo->log_file_name, last_gtid_coord.second);
+ return true;
+ }
+ else if (member->in_engine_prepare < last_gtid_engines)
+ {
+ DBUG_ASSERT(member->in_engine_prepare > 0);
+ /*
+ This is an "unlikely" branch of two or more engines in transaction
+ that is partially committed, so to complete.
+ */
+ member->decided_to_commit= true;
+ if (do_truncate)
+ {
+ /* Validated truncate at this point can be only in the 2nd round. */
+ DBUG_ASSERT(!truncate_validated ||
+ (round == 2 && truncate_set_in_1st &&
+ last_gtid_coord < binlog_truncate_coord));
+ /*
+ Estimated truncate must not be greater than the current one's
+ offset, unless the turn of the rounds.
+ */
+ DBUG_ASSERT(truncate_validated ||
+ (last_gtid_coord >= binlog_truncate_coord ||
+ (round == 2 && truncate_set_in_1st)));
+
+ if (!truncate_validated && reset_truncate_coord(pos))
+ return true;
+ }
+ }
+ else // member->in_engine_prepare == last_gtid_engines
+ {
+ if (!do_truncate) // "normal" recovery
+ {
+ member->decided_to_commit= true;
+ }
+ else
+ {
+ member->binlog_coord= last_gtid_coord;
+ last_gtid_valid= false;
+ /*
+ First time truncate position estimate before its validation.
+ An estimate may change to involve reset_truncate_coord call.
+ */
+ if (!truncate_validated)
+ {
+ if (truncate_gtid.seq_no == 0 /* was reset or never set */ ||
+ (truncate_set_in_1st && round == 2 /* reevaluted at round turn */))
+ {
+ if (set_truncate_coord(linfo, round, fdle->checksum_alg))
+ return true;
+ }
+ else
+ {
+ /* Truncate estimate was done ago, this gtid can't improve it. */
+ DBUG_ASSERT(last_gtid_coord >= binlog_truncate_coord);
+
+ gtid_maybe_to_truncate->append(last_gtid);
+ }
+
+ DBUG_ASSERT(member->decided_to_commit == false); // may redecided
+ }
+ else
+ {
+ /*
+ binlog truncate was determined, possibly to none, otherwise
+ its offset greater than that of the current gtid.
+ */
+ DBUG_ASSERT(truncate_gtid.seq_no == 0 ||
+ last_gtid_coord < binlog_truncate_coord);
+ member->decided_to_commit= true;
+ }
+ }
+ }
+ }
+ else if (do_truncate) // "0" < last_gtid_engines
+ {
+ /*
+ Similar to the partial commit branch above.
+ */
+ DBUG_ASSERT(!truncate_validated || last_gtid_coord < binlog_truncate_coord);
+ DBUG_ASSERT(truncate_validated ||
+ (last_gtid_coord >= binlog_truncate_coord ||
+ (round == 2 && truncate_set_in_1st)));
+
+ if (!truncate_validated && reset_truncate_coord(pos))
+ return true;
+ }
+
+ return false;
+}
+
+void Recovery_context::update_binlog_unsafe_coord_if_needed(LOG_INFO *linfo)
+{
+ if (!do_truncate)
+ return;
+
+ if (truncate_gtid.seq_no > 0 && // g1,U2, *not* G1,U2
+ last_gtid_coord > binlog_truncate_coord)
+ {
+ DBUG_ASSERT(binlog_truncate_coord.second > 0);
+ /*
+ Potentially unsafe when the truncate coordinate is not determined,
+ just detected as unsafe when behind the latter.
+ */
+ if (last_gtid_engines == 0)
+ {
+ last_gtid_valid= false;
+ }
+ else
+ {
+ binlog_unsafe_gtid= last_gtid;
+ binlog_unsafe_coord= last_gtid_coord;
+ strmake_buf(binlog_unsafe_file_name, linfo->log_file_name);
+ }
+ }
+}
+
+void Recovery_context::process_gtid(int round, Gtid_log_event *gev,
+ LOG_INFO *linfo)
+{
+ last_gtid.domain_id= gev->domain_id;
+ last_gtid.server_id= gev->server_id;
+ last_gtid.seq_no= gev->seq_no;
+ last_gtid_engines= gev->extra_engines != UCHAR_MAX ?
+ gev->extra_engines + 1 : 0;
+ last_gtid_coord= Binlog_offset(id_binlog, prev_event_pos);
+ DBUG_ASSERT(!last_gtid_valid);
+ DBUG_ASSERT(!last_gtid.seq_no == 0);
+
+ if (round == 1 || (do_truncate && !truncate_validated))
+ {
+ DBUG_ASSERT(!last_gtid_valid);
+
+ last_gtid_no2pc= false;
+ last_gtid_standalone=
+ (gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false;
+ if (do_truncate && last_gtid_standalone)
+ update_binlog_unsafe_coord_if_needed(linfo);
+ /* Update the binlog state with any 'valid' GTID logged after Gtid_list. */
+ last_gtid_valid= true; // may flip at Xid when falls to truncate
+ }
+}
+
+int Recovery_context::next_binlog_or_round(int& round,
+ const char *last_log_name,
+ const char *binlog_checkpoint_name,
+ LOG_INFO *linfo,
+ MYSQL_BIN_LOG *log)
+{
+ if (!strcmp(linfo->log_file_name, last_log_name))
+ {
+ /* Exit the loop now at the end of the current round. */
+ DBUG_ASSERT(round <= 2);
+
+ if (do_truncate)
+ {
+ truncate_validated= truncate_reset_done;
+ truncate_reset_done= false;
+ /*
+ Restore the 1st round saved estimate if it was not refined in the 2nd.
+ That can only occur in multiple log files context when the inital file
+ has a truncation candidate (a `g`) and does not have any commited `G`,
+ *and* other files (binlog-checkpoint one and so on) do not have any
+ transaction-in-doubt.
+ */
+ if (truncate_gtid.seq_no == 0 && truncate_set_in_1st)
+ {
+ DBUG_ASSERT(truncate_gtid_1st_round.seq_no > 0);
+
+ truncate_gtid= truncate_gtid_1st_round;
+ binlog_truncate_coord= binlog_truncate_coord_1st_round;
+ }
+ }
+ return 1;
+ }
+ else if (round == 1)
+ {
+ if (do_truncate)
+ {
+ truncate_validated= truncate_reset_done;
+ if (!truncate_validated)
+ {
+ rpl_global_gtid_binlog_state.reset_nolock();
+ gtid_maybe_to_truncate->clear();
+ }
+ truncate_reset_done= false;
+ id_binlog= 0;
+ }
+ round++;
+ }
+ else if (do_truncate) // binlog looping within round 2
+ {
+ id_binlog++;
+
+ DBUG_ASSERT(id_binlog <= MAX_binlog_id); // the assert is "practical"
+ }
+
+ DBUG_ASSERT(!do_truncate || id_binlog != MAX_binlog_id ||
+ !strcmp(linfo->log_file_name, binlog_checkpoint_name));
+
+ return 0;
+}
+#endif
/*
Execute recovery of the binary log
@@ -10476,16 +11137,25 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
MEM_ROOT mem_root;
char binlog_checkpoint_name[FN_REFLEN];
bool binlog_checkpoint_found;
- bool first_round;
IO_CACHE log;
File file= -1;
const char *errmsg;
#ifdef HAVE_REPLICATION
- rpl_gtid last_gtid;
- bool last_gtid_standalone= false;
- bool last_gtid_valid= false;
+ Recovery_context ctx;
#endif
DBUG_ENTER("TC_LOG_BINLOG::recover");
+ /*
+ The for-loop variable is updated by the following rule set:
+ Initially set to 1.
+ After the initial binlog file is processed to identify
+ the Binlog-checkpoint file it is incremented when the latter file
+ is different from the initial one. Otherwise the only log has been
+ fully parsed so the for loop exits.
+ The 2nd round parses all earlier in binlog index order files
+ starting from the Binlog-checkpoint file. It ends when the initial
+ binlog file is reached.
+ */
+ int round;
if (! fdle->is_valid() ||
(my_hash_init(key_memory_binlog_recover_exec, &xids,
@@ -10501,6 +11171,10 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
+ /* finds xids when root is not NULL */
+ if (do_xa && ha_recover(&xids, &mem_root))
+ goto err1;
+
/*
Scan the binlog for XIDs that need to be committed if still in the
prepared stage.
@@ -10510,10 +11184,9 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
*/
binlog_checkpoint_found= false;
- first_round= true;
- for (;;)
+ for (round= 1;;)
{
- while ((ev= Log_event::read_log_event(first_round ? first_log : &log,
+ while ((ev= Log_event::read_log_event(round == 1 ? first_log : &log,
fdle, opt_master_verify_checksum))
&& ev->is_valid())
{
@@ -10521,17 +11194,23 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
switch (typ)
{
case XID_EVENT:
+ if (do_xa)
{
- if (do_xa)
+ xid_recovery_member *member=
+ (xid_recovery_member*)
+ my_hash_search(&xids, (uchar*) &static_cast<Xid_log_event*>(ev)->xid,
+ sizeof(my_xid));
+#ifndef HAVE_REPLICATION
{
- Xid_log_event *xev=(Xid_log_event *)ev;
- uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
- sizeof(xev->xid));
- if (!x || my_hash_insert(&xids, x))
- goto err2;
+ if (member)
+ member->decided_to_commit= true;
}
- break;
+#else
+ if (ctx.decide_or_assess(member, round, fdle, linfo, ev->log_pos))
+ goto err2;
+#endif
}
+ break;
case QUERY_EVENT:
{
Query_log_event *query_ev= (Query_log_event*) ev;
@@ -10545,10 +11224,18 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
if (!x || my_hash_insert(&ddl_log_ids, x))
goto err2;
}
+#ifdef HAVE_REPLICATION
+ if (((Query_log_event *)ev)->is_commit() ||
+ ((Query_log_event *)ev)->is_rollback())
+ {
+ ctx.last_gtid_no2pc= true;
+ ctx.update_binlog_unsafe_coord_if_needed(linfo);
+ }
+#endif
break;
}
case BINLOG_CHECKPOINT_EVENT:
- if (first_round && do_xa)
+ if (round == 1 && do_xa)
{
size_t dir_len;
Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev;
@@ -10568,8 +11255,9 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
}
}
break;
+#ifdef HAVE_REPLICATION
case GTID_LIST_EVENT:
- if (first_round)
+ if (round == 1 || (ctx.do_truncate && ctx.id_binlog == 0))
{
Gtid_list_log_event *glev= (Gtid_list_log_event *)ev;
@@ -10579,20 +11267,13 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
}
break;
-#ifdef HAVE_REPLICATION
case GTID_EVENT:
- if (first_round)
- {
- Gtid_log_event *gev= (Gtid_log_event *)ev;
-
- /* Update the binlog state with any GTID logged after Gtid_list. */
- last_gtid.domain_id= gev->domain_id;
- last_gtid.server_id= gev->server_id;
- last_gtid.seq_no= gev->seq_no;
- last_gtid_standalone=
- ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false);
- last_gtid_valid= true;
- }
+ ctx.process_gtid(round, (Gtid_log_event *)ev, linfo);
+ break;
+
+ case XA_PREPARE_LOG_EVENT:
+ ctx.last_gtid_no2pc= true; // TODO: complete MDEV-21469 that removes this block
+ ctx.update_binlog_unsafe_coord_if_needed(linfo);
break;
#endif
@@ -10606,27 +11287,31 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
default:
/* Nothing. */
break;
- }
+ } // end of switch
#ifdef HAVE_REPLICATION
- if (last_gtid_valid &&
- ((last_gtid_standalone && !ev->is_part_of_group(typ)) ||
- (!last_gtid_standalone &&
- (typ == XID_EVENT ||
- typ == XA_PREPARE_LOG_EVENT ||
- (LOG_EVENT_IS_QUERY(typ) &&
- (((Query_log_event *)ev)->is_commit() ||
- ((Query_log_event *)ev)->is_rollback()))))))
+ if (ctx.last_gtid_valid &&
+ ((ctx.last_gtid_standalone && !ev->is_part_of_group(typ)) ||
+ (!ctx.last_gtid_standalone &&
+ (typ == XID_EVENT || ctx.last_gtid_no2pc))))
{
- if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false))
+ DBUG_ASSERT(round == 1 || (ctx.do_truncate && !ctx.truncate_validated));
+ DBUG_ASSERT(!ctx.last_gtid_no2pc ||
+ (ctx.last_gtid_standalone ||
+ typ == XA_PREPARE_LOG_EVENT ||
+ (LOG_EVENT_IS_QUERY(typ) &&
+ (((Query_log_event *)ev)->is_commit() ||
+ ((Query_log_event *)ev)->is_rollback()))));
+
+ if (rpl_global_gtid_binlog_state.update_nolock(&ctx.last_gtid, false))
goto err2;
- last_gtid_valid= false;
+ ctx.last_gtid_valid= false;
}
+ ctx.prev_event_pos= ev->log_pos;
#endif
-
delete ev;
ev= NULL;
- }
+ } // end of while
/*
If the last binlog checkpoint event points to an older log, we have to
@@ -10636,11 +11321,10 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
written by an older version of MariaDB (or MySQL) - these always have an
(implicit) binlog checkpoint event at the start of the last binlog file.
*/
- if (first_round)
+ if (round == 1)
{
if (!binlog_checkpoint_found)
break;
- first_round= false;
DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004",
if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) &&
0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name))
@@ -10658,33 +11342,50 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
end_io_cache(&log);
mysql_file_close(file, MYF(MY_WME));
file= -1;
+ /*
+ NOTE: reading other binlog's FD is necessary for finding out
+ the checksum status of the respective binlog file.
+ */
+ if (find_next_log(linfo, 1))
+ {
+ sql_print_error("Error reading binlog files during recovery. "
+ "Aborting.");
+ goto err2;
+ }
}
+#ifdef HAVE_REPLICATION
+ int rc= ctx.next_binlog_or_round(round, last_log_name,
+ binlog_checkpoint_name, linfo, this);
+ if (rc == -1)
+ goto err2;
+ else if (rc == 1)
+ break; // all rounds done
+#else
if (!strcmp(linfo->log_file_name, last_log_name))
break; // No more files to do
+ round++;
+#endif
+
if ((file= open_binlog(&log, linfo->log_file_name, &errmsg)) < 0)
{
sql_print_error("%s", errmsg);
goto err2;
}
- /*
- We do not need to read the Format_description_log_event of other binlog
- files. It is not possible for a binlog checkpoint to span multiple
- binlog files written by different versions of the server. So we can use
- the first one read for reading from all binlog files.
- */
- if (find_next_log(linfo, 1))
- {
- sql_print_error("Error reading binlog files during recovery. Aborting.");
- goto err2;
- }
fdle->reset_crypto();
- }
+ } // end of for
if (do_xa)
{
- if (ha_recover(&xids))
- goto err2;
+ if (binlog_checkpoint_found)
+ {
+#ifndef HAVE_REPLICATION
+ if (ha_recover_complete(&xids))
+#else
+ if (ctx.complete(this, xids))
+#endif
+ goto err2;
+ }
}
if (ddl_log_close_binlogged_events(&ddl_log_ids))
goto err2;
@@ -10713,6 +11414,7 @@ err1:
}
+
int
MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
{
diff --git a/sql/log.h b/sql/log.h
index eaf7cde1c07..d61e4660330 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -477,6 +477,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
/* Flag used to optimise around wait_for_prior_commit. */
bool queued_by_other;
ulong binlog_id;
+ bool ro_1pc; // passes the binlog_cache_mngr::ro_1pc value to Gtid ctor
};
/*
@@ -810,7 +811,8 @@ public:
my_bool *with_annotate= 0); // binary log write
bool write_transaction_to_binlog(THD *thd, binlog_cache_mngr *cache_mngr,
Log_event *end_ev, bool all,
- bool using_stmt_cache, bool using_trx_cache);
+ bool using_stmt_cache, bool using_trx_cache,
+ bool is_ro_1pc);
bool write_incident_already_locked(THD *thd);
bool write_incident(THD *thd);
@@ -860,6 +862,9 @@ public:
int purge_first_log(Relay_log_info* rli, bool included);
int set_purge_index_file_name(const char *base_file_name);
int open_purge_index_file(bool destroy);
+ bool truncate_and_remove_binlogs(const char *truncate_file,
+ my_off_t truncate_pos,
+ rpl_gtid *gtid);
bool is_inited_purge_index_file();
int close_purge_index_file();
int clean_purge_index_file();
@@ -896,7 +901,8 @@ public:
void set_status_variables(THD *thd);
bool is_xidlist_idle();
bool write_gtid_event(THD *thd, bool standalone, bool is_transactional,
- uint64 commit_id);
+ uint64 commit_id,
+ bool has_xid= false, bool ro_1pc= false);
int read_state_from_file();
int write_state_to_file();
int get_most_recent_gtid_list(rpl_gtid **list, uint32 *size);
@@ -1237,4 +1243,8 @@ class Gtid_list_log_event;
const char *
get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list);
+int binlog_commit(THD *thd, bool all, bool is_ro_1pc);
+int binlog_commit_by_xid(handlerton *hton, XID *xid);
+int binlog_rollback_by_xid(handlerton *hton, XID *xid);
+
#endif /* LOG_H */
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 9c7c56b1c34..c77059ee8f5 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -53,6 +53,7 @@
#include "sql_digest.h"
#include "zlib.h"
#include "myisampack.h"
+#include <algorithm>
#define my_b_write_string(A, B) my_b_write((A), (uchar*)(B), (uint) (sizeof(B) - 1))
@@ -2560,10 +2561,12 @@ Binlog_checkpoint_log_event::Binlog_checkpoint_log_event(
Gtid_log_event::Gtid_log_event(const uchar *buf, uint event_len,
const Format_description_log_event
*description_event)
- : Log_event(buf, description_event), seq_no(0), commit_id(0)
+ : Log_event(buf, description_event), seq_no(0), commit_id(0),
+ flags_extra(0), extra_engines(0)
{
uint8 header_size= description_event->common_header_len;
uint8 post_header_len= description_event->post_header_len[GTID_EVENT-1];
+ const uchar *buf_0= buf;
if (event_len < (uint) header_size + (uint) post_header_len ||
post_header_len < GTID_HEADER_LEN)
return;
@@ -2597,6 +2600,32 @@ Gtid_log_event::Gtid_log_event(const uchar *buf, uint event_len,
memcpy(xid.data, buf, data_length);
buf+= data_length;
}
+
+ /* the extra flags check and actions */
+ if (static_cast<uint>(buf - buf_0) < event_len)
+ {
+ flags_extra= *buf++;
+ /*
+ extra engines flags presence is identifed by non-zero byte value
+ at this point
+ */
+ if (flags_extra & FL_EXTRA_MULTI_ENGINE)
+ {
+ DBUG_ASSERT(static_cast<uint>(buf - buf_0) < event_len);
+
+ extra_engines= *buf++;
+
+ DBUG_ASSERT(extra_engines > 0);
+ }
+ }
+ /*
+ the strict '<' part of the assert corresponds to extra zero-padded
+ trailing bytes,
+ */
+ DBUG_ASSERT(static_cast<uint>(buf - buf_0) <= event_len);
+ /* and the last of them is tested. */
+ DBUG_ASSERT(static_cast<uint>(buf - buf_0) == event_len ||
+ buf_0[event_len - 1] == 0);
}
diff --git a/sql/log_event.h b/sql/log_event.h
index 990d95e1dc0..3adc7a26d93 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -490,6 +490,16 @@ class String;
#define LOG_EVENT_IGNORABLE_F 0x80
/**
+ @def LOG_EVENT_ACCEPT_OWN_F
+
+ Flag sets by the semisync slave for accepting
+ the same server_id ("own") events which the slave must not have
+ in its state. Typically such events were never committed by
+ their originator (this server) and discared at its semisync-slave recovery.
+*/
+#define LOG_EVENT_ACCEPT_OWN_F 0x4000
+
+/**
@def LOG_EVENT_SKIP_REPLICATION_F
Flag set by application creating the event (with @@skip_replication); the
@@ -3602,6 +3612,13 @@ public:
event_mysql_xid_t xid;
#endif
uchar flags2;
+ uint flags_extra; // more flags area placed after the regular flags2's one
+ /*
+ Number of engine participants in transaction minus 1.
+ When zero the event does not contain that information.
+ */
+ uint8 extra_engines;
+
/* Flags2. */
/* FL_STANDALONE is set when there is no terminating COMMIT event. */
@@ -3633,9 +3650,19 @@ public:
/* FL_"COMMITTED or ROLLED-BACK"_XA is set for XA transaction. */
static const uchar FL_COMPLETED_XA= 128;
+ /* Flags_extra. */
+
+ /*
+ FL_EXTRA_MULTI_ENGINE is set for event group comprising a transaction
+ involving multiple storage engines. No flag and extra data are added
+ to the event when the transaction involves only one engine.
+ */
+ static const uchar FL_EXTRA_MULTI_ENGINE= 1;
+
#ifdef MYSQL_SERVER
Gtid_log_event(THD *thd_arg, uint64 seq_no, uint32 domain_id, bool standalone,
- uint16 flags, bool is_transactional, uint64 commit_id);
+ uint16 flags, bool is_transactional, uint64 commit_id,
+ bool has_xid= false, bool is_ro_1pc= false);
#ifdef HAVE_REPLICATION
void pack_info(Protocol *protocol);
virtual int do_apply_event(rpl_group_info *rgi);
diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc
index e216724ca7a..d97e87fc4e9 100644
--- a/sql/log_event_server.cc
+++ b/sql/log_event_server.cc
@@ -644,7 +644,7 @@ Log_event::do_shall_skip(rpl_group_info *rgi)
rli->replicate_same_server_id,
rli->slave_skip_counter));
if ((server_id == global_system_variables.server_id &&
- !rli->replicate_same_server_id) ||
+ !(rli->replicate_same_server_id || (flags & LOG_EVENT_ACCEPT_OWN_F))) ||
(rli->slave_skip_counter == 1 && rli->is_in_group()) ||
(flags & LOG_EVENT_SKIP_REPLICATION_F &&
opt_replicate_events_marked_for_skip != RPL_SKIP_REPLICATE))
@@ -3261,10 +3261,13 @@ bool Binlog_checkpoint_log_event::write()
Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg,
uint32 domain_id_arg, bool standalone,
uint16 flags_arg, bool is_transactional,
- uint64 commit_id_arg)
+ uint64 commit_id_arg, bool has_xid,
+ bool ro_1pc)
: Log_event(thd_arg, flags_arg, is_transactional),
seq_no(seq_no_arg), commit_id(commit_id_arg), domain_id(domain_id_arg),
- flags2((standalone ? FL_STANDALONE : 0) | (commit_id_arg ? FL_GROUP_COMMIT_ID : 0))
+ flags2((standalone ? FL_STANDALONE : 0) |
+ (commit_id_arg ? FL_GROUP_COMMIT_ID : 0)),
+ flags_extra(0), extra_engines(0)
{
cache_type= Log_event::EVENT_NO_CACHE;
bool is_tmp_table= thd_arg->lex->stmt_accessed_temp_table();
@@ -3287,15 +3290,40 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg,
flags2|= (thd_arg->rgi_slave->gtid_ev_flags2 & (FL_DDL|FL_WAITED));
XID_STATE &xid_state= thd->transaction->xid_state;
- if (is_transactional && xid_state.is_explicit_XA() &&
- (thd->lex->sql_command == SQLCOM_XA_PREPARE ||
- xid_state.get_state_code() == XA_PREPARED))
+ if (is_transactional)
{
- DBUG_ASSERT(thd->lex->xa_opt != XA_ONE_PHASE);
+ if (xid_state.is_explicit_XA() &&
+ (thd->lex->sql_command == SQLCOM_XA_PREPARE ||
+ xid_state.get_state_code() == XA_PREPARED))
+ {
+ DBUG_ASSERT(thd->lex->xa_opt != XA_ONE_PHASE);
+
+ flags2|= thd->lex->sql_command == SQLCOM_XA_PREPARE ?
+ FL_PREPARED_XA : FL_COMPLETED_XA;
+ xid.set(xid_state.get_xid());
+ }
+ /* count non-zero extra recoverable engines; total = extra + 1 */
+ if (has_xid)
+ {
+ DBUG_ASSERT(ha_count_rw_2pc(thd_arg,
+ thd_arg->in_multi_stmt_transaction_mode()));
+
+ extra_engines=
+ ha_count_rw_2pc(thd_arg, thd_arg->in_multi_stmt_transaction_mode()) - 1;
+ }
+ else if (ro_1pc)
+ {
+ extra_engines= UCHAR_MAX;
+ }
+ else if (thd->lex->sql_command == SQLCOM_XA_PREPARE)
+ {
+ DBUG_ASSERT(thd_arg->in_multi_stmt_transaction_mode());
- flags2|= thd->lex->sql_command == SQLCOM_XA_PREPARE ?
- FL_PREPARED_XA : FL_COMPLETED_XA;
- xid.set(xid_state.get_xid());
+ uint8 count= ha_count_rw_2pc(thd_arg, true);
+ extra_engines= count > 1 ? 0 : UCHAR_MAX;
+ }
+ if (extra_engines > 0)
+ flags_extra|= FL_EXTRA_MULTI_ENGINE;
}
}
@@ -3339,19 +3367,19 @@ Gtid_log_event::peek(const uchar *event_start, size_t event_len,
bool
Gtid_log_event::write()
{
- uchar buf[GTID_HEADER_LEN+2+sizeof(XID)];
- size_t write_len;
+ uchar buf[GTID_HEADER_LEN+2+sizeof(XID) + /* flags_extra: */ 1+4];
+ size_t write_len= 13;
int8store(buf, seq_no);
int4store(buf+8, domain_id);
buf[12]= flags2;
if (flags2 & FL_GROUP_COMMIT_ID)
{
- int8store(buf+13, commit_id);
+ DBUG_ASSERT(write_len + 8 == GTID_HEADER_LEN + 2);
+
+ int8store(buf+write_len, commit_id);
write_len= GTID_HEADER_LEN + 2;
}
- else
- write_len= 13;
if (flags2 & (FL_PREPARED_XA | FL_COMPLETED_XA))
{
@@ -3363,6 +3391,16 @@ Gtid_log_event::write()
memcpy(buf+write_len, xid.data, data_length);
write_len+= data_length;
}
+ if (flags_extra > 0)
+ {
+ buf[write_len]= flags_extra;
+ write_len++;
+ }
+ if (flags_extra & FL_EXTRA_MULTI_ENGINE)
+ {
+ buf[write_len]= extra_engines;
+ write_len++;
+ }
if (write_len < GTID_HEADER_LEN)
{
diff --git a/sql/slave.cc b/sql/slave.cc
index 17c2d59c9d7..e7aa0d4a510 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -6174,6 +6174,15 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
bool is_malloc = false;
bool is_rows_event= false;
/*
+ The flag has replicate_same_server_id semantics and is raised to accept
+ a same-server-id event on the semisync slave, for both the gtid and legacy
+ connection modes.
+ Such events can appear as result of this server recovery so the event
+ was created there and replicated elsewhere right before the crash. At recovery
+ it could be evicted from the server's binlog.
+ */
+ bool do_accept_own_server_id= false;
+ /*
FD_q must have been prepared for the first R_a event
inside get_master_version_and_clock()
Show-up of FD:s affects checksum_alg at once because
@@ -6234,6 +6243,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
unlock_data_lock= FALSE;
goto err;
}
+ DBUG_ASSERT(((uchar) buf[FLAGS_OFFSET] & LOG_EVENT_ACCEPT_OWN_F) == 0);
if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 &&
buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
@@ -6933,7 +6943,8 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
}
else
if ((s_id == global_system_variables.server_id &&
- !mi->rli.replicate_same_server_id) ||
+ !(mi->rli.replicate_same_server_id ||
+ (do_accept_own_server_id= rpl_semi_sync_slave_enabled))) ||
event_that_should_be_ignored(buf) ||
/*
the following conjunction deals with IGNORE_SERVER_IDS, if set
@@ -6993,6 +7004,19 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
}
else
{
+ if (do_accept_own_server_id)
+ {
+ int2store(const_cast<uchar*>(buf + FLAGS_OFFSET),
+ uint2korr(buf + FLAGS_OFFSET) | LOG_EVENT_ACCEPT_OWN_F);
+ if (checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
+ {
+ ha_checksum crc= 0;
+
+ crc= my_checksum(crc, (const uchar *) buf,
+ event_len - BINLOG_CHECKSUM_LEN);
+ int4store(&buf[event_len - BINLOG_CHECKSUM_LEN], crc);
+ }
+ }
if (likely(!rli->relay_log.write_event_buffer((uchar*)buf, event_len)))
{
mi->master_log_pos+= inc_pos;
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index 125abe334fc..5e64007ef84 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -831,6 +831,7 @@ void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key,
flush_lock.release(flush_lsn);
log_flush_notify(flush_lsn);
+ DBUG_EXECUTE_IF("crash_after_log_write_upto", DBUG_SUICIDE(););
}
/** write to the log file up to the last log entry.