summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/extra/rpl_tests/rpl_insert_delayed.test2
-rw-r--r--mysql-test/r/mysqlbinlog.result1
-rw-r--r--mysql-test/r/mysqlbinlog2.result12
-rw-r--r--mysql-test/suite/binlog/r/binlog_checkpoint.result88
-rw-r--r--mysql-test/suite/binlog/r/binlog_mdev342.result1
-rw-r--r--mysql-test/suite/binlog/r/binlog_row_binlog.result1
-rw-r--r--mysql-test/suite/binlog/r/binlog_row_mysqlbinlog_options.result4
-rw-r--r--mysql-test/suite/binlog/r/binlog_stm_binlog.result1
-rw-r--r--mysql-test/suite/binlog/r/binlog_xa_recover.result275
-rw-r--r--mysql-test/suite/binlog/t/binlog_checkpoint.test108
-rw-r--r--mysql-test/suite/binlog/t/binlog_xa_recover-master.opt2
-rw-r--r--mysql-test/suite/binlog/t/binlog_xa_recover.test245
-rw-r--r--mysql-test/suite/innodb/r/binlog_consistent.result6
-rw-r--r--mysql-test/suite/innodb/r/group_commit_binlog_pos.result1
-rw-r--r--mysql-test/suite/innodb/r/group_commit_binlog_pos_no_optimize_thread.result1
-rw-r--r--mysql-test/suite/innodb/t/group_commit_binlog_pos.test13
-rw-r--r--mysql-test/suite/innodb/t/group_commit_binlog_pos_no_optimize_thread.test13
-rw-r--r--mysql-test/suite/rpl/r/rpl_checksum.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_insert_delayed,stmt.rdiff2
-rw-r--r--mysql-test/suite/rpl/r/rpl_mariadb_slave_capability.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_row_log.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_row_log_innodb.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_row_show_relaylog_events.result11
-rw-r--r--mysql-test/suite/rpl/r/rpl_stm_log.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_stm_mix_show_relaylog_events.result11
-rw-r--r--mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test2
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_flush_log_at_trx_commit_basic.result10
-rw-r--r--sql/handler.cc49
-rw-r--r--sql/handler.h42
-rw-r--r--sql/log.cc602
-rw-r--r--sql/log.h91
-rw-r--r--sql/log_event.cc2
-rw-r--r--sql/log_event.h2
-rw-r--r--sql/mysqld.cc6
-rw-r--r--sql/mysqld.h2
-rw-r--r--sql/rpl_rli.cc2
-rw-r--r--sql/slave.cc13
-rw-r--r--sql/sql_class.h3
-rw-r--r--storage/innobase/handler/ha_innodb.cc30
-rw-r--r--storage/innobase/trx/trx0trx.c11
-rw-r--r--storage/xtradb/handler/ha_innodb.cc30
-rw-r--r--storage/xtradb/include/trx0trx.h1
-rw-r--r--storage/xtradb/trx/trx0trx.c11
43 files changed, 1286 insertions, 431 deletions
diff --git a/mysql-test/extra/rpl_tests/rpl_insert_delayed.test b/mysql-test/extra/rpl_tests/rpl_insert_delayed.test
index df08622b0bd..bb34f4be207 100644
--- a/mysql-test/extra/rpl_tests/rpl_insert_delayed.test
+++ b/mysql-test/extra/rpl_tests/rpl_insert_delayed.test
@@ -133,7 +133,7 @@ if (`SELECT @@global.binlog_format = 'STATEMENT'`)
{
#must show two INSERT DELAYED
--let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1)
- --let $binlog_limit= 1,6
+ --let $binlog_limit= 2,6
--source include/show_binlog_events.inc
}
select * from t1;
diff --git a/mysql-test/r/mysqlbinlog.result b/mysql-test/r/mysqlbinlog.result
index 33904dfd9bd..255b0679244 100644
--- a/mysql-test/r/mysqlbinlog.result
+++ b/mysql-test/r/mysqlbinlog.result
@@ -892,6 +892,7 @@ DROP DATABASE test1;
FLUSH LOGS;
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # CREATE DATABASE test1
master-bin.000002 # Query # # use `test1`; CREATE TABLE t1(id int)
master-bin.000002 # Query # # DROP DATABASE test1
diff --git a/mysql-test/r/mysqlbinlog2.result b/mysql-test/r/mysqlbinlog2.result
index 806cf74479e..bf65bab602d 100644
--- a/mysql-test/r/mysqlbinlog2.result
+++ b/mysql-test/r/mysqlbinlog2.result
@@ -697,7 +697,6 @@ SET @@session.lc_time_names=0/*!*/;
SET @@session.collation_database=DEFAULT/*!*/;
BEGIN
/*!*/;
-SET INSERT_ID=6/*!*/;
DELIMITER ;
# End of log file
ROLLBACK /* added by mysqlbinlog */;
@@ -1483,17 +1482,6 @@ COMMIT
/*!*/;
DELIMITER ;
DELIMITER /*!*/;
-SET TIMESTAMP=1579609943/*!*/;
-SET @@session.pseudo_thread_id=999999999/*!*/;
-SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=0, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
-SET @@session.sql_mode=0/*!*/;
-SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
-/*!\C latin1 *//*!*/;
-SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=8/*!*/;
-SET @@session.lc_time_names=0/*!*/;
-SET @@session.collation_database=DEFAULT/*!*/;
-BEGIN
-/*!*/;
DELIMITER ;
# End of log file
ROLLBACK /* added by mysqlbinlog */;
diff --git a/mysql-test/suite/binlog/r/binlog_checkpoint.result b/mysql-test/suite/binlog/r/binlog_checkpoint.result
new file mode 100644
index 00000000000..7532e33367e
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_checkpoint.result
@@ -0,0 +1,88 @@
+SET @old_max_binlog_size= @@global.max_binlog_size;
+SET GLOBAL max_binlog_size= 4096;
+SET @old_innodb_flush_log_at_trx_commit= @@global.innodb_flush_log_at_trx_commit;
+SET GLOBAL innodb_flush_log_at_trx_commit= 1;
+RESET MASTER;
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Myisam;
+*** Test that RESET MASTER waits for pending commit checkpoints to complete.
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
+INSERT INTO t1 VALUES (1, REPEAT("x", 4100));
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+INSERT INTO t2 VALUES (1, REPEAT("x", 4100));
+INSERT INTO t2 VALUES (2, REPEAT("x", 4100));
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+master-bin.000003 #
+master-bin.000004 #
+show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.00000<binlog_start> # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000001
+SET DEBUG_SYNC= "execute_command_after_close_tables SIGNAL reset_master_done";
+RESET MASTER;
+This will timeout, as RESET MASTER is blocked
+SET DEBUG_SYNC= "now WAIT_FOR reset_master_done TIMEOUT 1";
+Warnings:
+Warning 1639 debug sync point wait timed out
+SET DEBUG_SYNC= "now SIGNAL con1_go";
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+show binlog events in 'master-bin.000001' from <binlog_start>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.000001 # Binlog_checkpoint # # master-bin.000001
+*** Test that binlog N is active, and commit checkpoint for (N-1) is
+*** done while there is still a pending commit checkpoint for (N-2).
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR con1_continue";
+INSERT INTO t1 VALUES (20, REPEAT("x", 4100));
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con2_ready WAIT_FOR con2_continue";
+INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
+SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+master-bin.000003 #
+show binlog events in 'master-bin.000001' from <binlog_start>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.000001 # Binlog_checkpoint # # master-bin.000001
+master-bin.000001 # Query # # BEGIN
+master-bin.000001 # Table_map # # table_id: # (test.t1)
+master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000001 # Xid # # COMMIT /* XID */
+master-bin.000001 # Rotate # # master-bin.000002;pos=<binlog_start>
+show binlog events in 'master-bin.000002' from <binlog_start>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.000002 # Binlog_checkpoint # # master-bin.000001
+master-bin.000002 # Query # # BEGIN
+master-bin.000002 # Table_map # # table_id: # (test.t1)
+master-bin.000002 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000002 # Xid # # COMMIT /* XID */
+master-bin.000002 # Rotate # # master-bin.000003;pos=<binlog_start>
+show binlog events in 'master-bin.000003' from <binlog_start>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.000003 # Binlog_checkpoint # # master-bin.000001
+SET DEBUG_SYNC= "now SIGNAL con2_continue";
+con1 is still pending, no new binlog checkpoint should have been logged.
+show binlog events in 'master-bin.000003' from <binlog_start>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.000003 # Binlog_checkpoint # # master-bin.000001
+SET DEBUG_SYNC= "now SIGNAL con1_continue";
+No commit checkpoints are pending, a new binlog checkpoint should have been logged.
+show binlog events in 'master-bin.000003' from <binlog_start>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.000003 # Binlog_checkpoint # # master-bin.000001
+master-bin.000003 # Binlog_checkpoint # # master-bin.000003
+DROP TABLE t1, t2;
+SET GLOBAL max_binlog_size= @old_max_binlog_size;
+SET GLOBAL innodb_flush_log_at_trx_commit= @old_innodb_flush_log_at_trx_commit;
diff --git a/mysql-test/suite/binlog/r/binlog_mdev342.result b/mysql-test/suite/binlog/r/binlog_mdev342.result
index 0e1d8f8ac78..6ec6dcd783b 100644
--- a/mysql-test/suite/binlog/r/binlog_mdev342.result
+++ b/mysql-test/suite/binlog/r/binlog_mdev342.result
@@ -21,6 +21,7 @@ master-bin.000002 #
show binlog events in 'master-bin.000001' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.000001 # Binlog_checkpoint # # master-bin.000001
master-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb
master-bin.000001 # Query # # BEGIN
master-bin.000001 # Table_map # # table_id: # (test.t1)
diff --git a/mysql-test/suite/binlog/r/binlog_row_binlog.result b/mysql-test/suite/binlog/r/binlog_row_binlog.result
index 26710178cd8..99ab1ac9ec2 100644
--- a/mysql-test/suite/binlog/r/binlog_row_binlog.result
+++ b/mysql-test/suite/binlog/r/binlog_row_binlog.result
@@ -234,6 +234,7 @@ master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=4
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
set @ac = @@autocommit;
set autocommit= 0;
diff --git a/mysql-test/suite/binlog/r/binlog_row_mysqlbinlog_options.result b/mysql-test/suite/binlog/r/binlog_row_mysqlbinlog_options.result
index ae732ffcc08..55f4154574b 100644
--- a/mysql-test/suite/binlog/r/binlog_row_mysqlbinlog_options.result
+++ b/mysql-test/suite/binlog/r/binlog_row_mysqlbinlog_options.result
@@ -34,8 +34,8 @@ DELIMITER /*!*/;
# at #
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
-#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
# at #
+#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
# at #
use `new_test1`/*!*/;
#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
@@ -230,8 +230,8 @@ DELIMITER /*!*/;
# at #
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
-#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
# at #
+#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
# at #
use `new_test1`/*!*/;
#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
diff --git a/mysql-test/suite/binlog/r/binlog_stm_binlog.result b/mysql-test/suite/binlog/r/binlog_stm_binlog.result
index f9d9fa1d18d..d676f5184ac 100644
--- a/mysql-test/suite/binlog/r/binlog_stm_binlog.result
+++ b/mysql-test/suite/binlog/r/binlog_stm_binlog.result
@@ -145,6 +145,7 @@ master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=4
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
set @ac = @@autocommit;
set autocommit= 0;
diff --git a/mysql-test/suite/binlog/r/binlog_xa_recover.result b/mysql-test/suite/binlog/r/binlog_xa_recover.result
index 41df149a928..0ac14fd7f7d 100644
--- a/mysql-test/suite/binlog/r/binlog_xa_recover.result
+++ b/mysql-test/suite/binlog/r/binlog_xa_recover.result
@@ -1,175 +1,198 @@
SET GLOBAL max_binlog_size= 4096;
+SET GLOBAL innodb_flush_log_at_trx_commit= 1;
+RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
-CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Myisam;
-SET @@global.debug_dbug='+d,skip_commit_ordered';
-INSERT INTO t1 VALUES (0, REPEAT("x", 4100));
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con1_ready WAIT_FOR _ever";
+INSERT INTO t1 VALUES (100, REPEAT("x", 4100));
+INSERT INTO t1 VALUES (101, REPEAT("x", 4100));
+INSERT INTO t1 VALUES (102, REPEAT("x", 4100));
+SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con1_wait WAIT_FOR con1_cont";
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR _ever";
INSERT INTO t1 VALUES (1, REPEAT("x", 4100));
-SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
-INSERT INTO t2 VALUES (1, "force binlog rotation");
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con2_ready WAIT_FOR _ever";
+SET DEBUG_SYNC= "now WAIT_FOR con1_wait";
+SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con2_wait WAIT_FOR con2_cont";
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con2_ready WAIT_FOR _ever";
INSERT INTO t1 VALUES (2, NULL);
-SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con3_ready WAIT_FOR _ever";
+SET DEBUG_SYNC= "now WAIT_FOR con2_wait";
+SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con3_wait WAIT_FOR con3_cont";
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con3_ready WAIT_FOR _ever";
INSERT INTO t1 VALUES (3, REPEAT("x", 4100));
+SET DEBUG_SYNC= "now WAIT_FOR con3_wait";
+SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con4_wait WAIT_FOR con4_cont";
+SET SESSION debug_dbug="+d,crash_commit_after_log";
+INSERT INTO t1 VALUES (4, NULL);
+SET DEBUG_SYNC= "now WAIT_FOR con4_wait";
+SET DEBUG_SYNC= "now SIGNAL con1_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+SET DEBUG_SYNC= "now SIGNAL con2_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
+SET DEBUG_SYNC= "now SIGNAL con3_cont";
SET DEBUG_SYNC= "now WAIT_FOR con3_ready";
-INSERT INTO t2 VALUES (2, "force binlog rotation");
-FLUSH TABLES t2;
show binary logs;
Log_name File_size
master-bin.000001 #
master-bin.000002 #
master-bin.000003 #
master-bin.000004 #
-show binlog events in 'master-bin.000001' from <binlog_start>;
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
-master-bin.000001 # Binlog_checkpoint # # master-bin.000001
-master-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb
-master-bin.000001 # Query # # use `test`; CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Myisam
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.t1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Rotate # # master-bin.000002;pos=<binlog_start>
-show binlog events in 'master-bin.000002' from <binlog_start>;
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000002 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
-master-bin.000002 # Binlog_checkpoint # # master-bin.000002
-master-bin.000002 # Query # # BEGIN
-master-bin.000002 # Table_map # # table_id: # (test.t1)
-master-bin.000002 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000002 # Xid # # COMMIT /* XID */
-master-bin.000002 # Query # # BEGIN
-master-bin.000002 # Table_map # # table_id: # (test.t2)
-master-bin.000002 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000002 # Query # # COMMIT
-master-bin.000002 # Rotate # # master-bin.000003;pos=<binlog_start>
+master-bin.000005 #
+master-bin.000006 #
show binlog events in 'master-bin.000003' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000003 # Binlog_checkpoint # # master-bin.000002
+master-bin.000003 # Binlog_checkpoint # # master-bin.000003
master-bin.000003 # Query # # BEGIN
master-bin.000003 # Table_map # # table_id: # (test.t1)
master-bin.000003 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000003 # Xid # # COMMIT /* XID */
-master-bin.000003 # Query # # BEGIN
-master-bin.000003 # Table_map # # table_id: # (test.t1)
-master-bin.000003 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000003 # Xid # # COMMIT /* XID */
-master-bin.000003 # Query # # BEGIN
-master-bin.000003 # Table_map # # table_id: # (test.t2)
-master-bin.000003 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000003 # Query # # COMMIT
master-bin.000003 # Rotate # # master-bin.00000<binlog_start>;pos=<binlog_start>
show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.00000<binlog_start> # Format_desc # # SERVER_VERSION, BINLOG_VERSION
-master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000002
-master-bin.00000<binlog_start> # Query # # use `test`; FLUSH TABLES t2
-We should see only one entry here, a=0:
-SELECT a FROM t1 ORDER BY a;
-a
-0
-PURGE BINARY LOGS TO "master-bin.000004";
+master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000003
+master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.00000<binlog_start>
+master-bin.00000<binlog_start> # Query # # BEGIN
+master-bin.00000<binlog_start> # Table_map # # table_id: # (test.t1)
+master-bin.00000<binlog_start> # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.00000<binlog_start> # Xid # # COMMIT /* XID */
+master-bin.00000<binlog_start> # Rotate # # master-bin.000005;pos=<binlog_start>
+show binlog events in 'master-bin.000005' from <binlog_start>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000005 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.000005 # Binlog_checkpoint # # master-bin.00000<binlog_start>
+master-bin.000005 # Query # # BEGIN
+master-bin.000005 # Table_map # # table_id: # (test.t1)
+master-bin.000005 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000005 # Xid # # COMMIT /* XID */
+master-bin.000005 # Query # # BEGIN
+master-bin.000005 # Table_map # # table_id: # (test.t1)
+master-bin.000005 # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.000005 # Xid # # COMMIT /* XID */
+master-bin.000005 # Rotate # # master-bin.000006;pos=<binlog_start>
+show binlog events in 'master-bin.000006' from <binlog_start>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000006 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.000006 # Binlog_checkpoint # # master-bin.00000<binlog_start>
+PURGE BINARY LOGS TO "master-bin.000006";
show binary logs;
Log_name File_size
-master-bin.000002 #
-master-bin.000003 #
master-bin.000004 #
-SET SESSION debug_dbug="+d,crash_commit_after_log";
-INSERT INTO t1 VALUES (4, NULL);
+master-bin.000005 #
+master-bin.000006 #
+SET DEBUG_SYNC= "now SIGNAL con4_cont";
Got one of the listed errors
SELECT a FROM t1 ORDER BY a;
a
-0
1
2
3
4
-*** Test that RESET MASTER waits for pending XIDs to be unlogged.
-SET @old_max_binlog_size= @@global.max_binlog_size;
+100
+101
+102
+Test that with multiple binlog checkpoints, recovery starts from the last one.
SET GLOBAL max_binlog_size= 4096;
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con10_ready WAIT_FOR con10_go";
-INSERT INTO t1 VALUES (10, NULL);
+SET GLOBAL innodb_flush_log_at_trx_commit= 1;
+RESET MASTER;
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con10_ready WAIT_FOR con10_cont";
+INSERT INTO t1 VALUES (10, REPEAT("x", 4100));
SET DEBUG_SYNC= "now WAIT_FOR con10_ready";
-INSERT INTO t2 VALUES (10, REPEAT("x", 4100));
-INSERT INTO t2 VALUES (11, REPEAT("x", 4100));
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con11_ready WAIT_FOR con11_cont";
+INSERT INTO t1 VALUES (11, REPEAT("x", 4100));
+SET DEBUG_SYNC= "now WAIT_FOR con11_ready";
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con12_ready WAIT_FOR con12_cont";
+INSERT INTO t1 VALUES (12, REPEAT("x", 4100));
+SET DEBUG_SYNC= "now WAIT_FOR con12_ready";
+INSERT INTO t1 VALUES (13, NULL);
show binary logs;
Log_name File_size
+master-bin.000001 #
master-bin.000002 #
master-bin.000003 #
master-bin.000004 #
-master-bin.000005 #
-master-bin.000006 #
-master-bin.000007 #
-SET DEBUG_SYNC= "execute_command_after_close_tables SIGNAL reset_master_done";
+show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.00000<binlog_start> # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000001
+master-bin.00000<binlog_start> # Query # # BEGIN
+master-bin.00000<binlog_start> # Table_map # # table_id: # (test.t1)
+master-bin.00000<binlog_start> # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.00000<binlog_start> # Xid # # COMMIT /* XID */
+SET DEBUG_SYNC= "now SIGNAL con10_cont";
+SET DEBUG_SYNC= "now SIGNAL con12_cont";
+SET DEBUG_SYNC= "now SIGNAL con11_cont";
+Checking that master-bin.000004 is the last binlog checkpoint
+show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.00000<binlog_start> # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000001
+master-bin.00000<binlog_start> # Query # # BEGIN
+master-bin.00000<binlog_start> # Table_map # # table_id: # (test.t1)
+master-bin.00000<binlog_start> # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.00000<binlog_start> # Xid # # COMMIT /* XID */
+master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000002
+master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.00000<binlog_start>
+Now crash the server
+SET SESSION debug_dbug="+d,crash_commit_after_log";
+INSERT INTO t1 VALUES (14, NULL);
+Got one of the listed errors
+SELECT a FROM t1 ORDER BY a;
+a
+1
+2
+3
+4
+10
+11
+12
+13
+14
+100
+101
+102
+*** Check that recovery works if we crashed early during rotate, before
+*** binlog checkpoint event could be written.
+SET GLOBAL max_binlog_size= 4096;
+SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
-This will timeout, as RESET MASTER is blocked
-SET DEBUG_SYNC= "now WAIT_FOR reset_master_done TIMEOUT 1";
-Warnings:
-Warning 1639 debug sync point wait timed out
-SET DEBUG_SYNC= "now SIGNAL con10_go";
-show binary logs;
-Log_name File_size
-master-bin.000001 #
-*** Test that binlog N is active, and last pending trx in (N-1) is
-unlogged while there is still a pending trx in (N-2).
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con10_ready WAIT_FOR con10_continue";
-INSERT INTO t1 VALUES (20, REPEAT("x", 4100));
-SET DEBUG_SYNC= "now WAIT_FOR con10_ready";
-INSERT INTO t2 VALUES (3, "force binlog rotation");
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con11_ready WAIT_FOR con11_continue";
INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
-SET DEBUG_SYNC= "now WAIT_FOR con11_ready";
-INSERT INTO t2 VALUES (4, "force binlog rotation");
+INSERT INTO t1 VALUES (22, REPEAT("x", 4100));
+INSERT INTO t1 VALUES (23, REPEAT("x", 4100));
+SET SESSION debug_dbug="+d,crash_before_write_checkpoint_event";
+INSERT INTO t1 VALUES (24, REPEAT("x", 4100));
+Got one of the listed errors
+SELECT a FROM t1 ORDER BY a;
+a
+1
+2
+3
+4
+10
+11
+12
+13
+14
+21
+22
+23
+24
+100
+101
+102
show binary logs;
Log_name File_size
master-bin.000001 #
master-bin.000002 #
master-bin.000003 #
-show binlog events in 'master-bin.000001' from <binlog_start>;
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
-master-bin.000001 # Binlog_checkpoint # # master-bin.000001
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.t1)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Xid # # COMMIT /* XID */
-master-bin.000001 # Query # # BEGIN
-master-bin.000001 # Table_map # # table_id: # (test.t2)
-master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000001 # Query # # COMMIT
-master-bin.000001 # Rotate # # master-bin.000002;pos=<binlog_start>
-show binlog events in 'master-bin.000002' from <binlog_start>;
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000002 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
-master-bin.000002 # Binlog_checkpoint # # master-bin.000001
-master-bin.000002 # Query # # BEGIN
-master-bin.000002 # Table_map # # table_id: # (test.t1)
-master-bin.000002 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000002 # Xid # # COMMIT /* XID */
-master-bin.000002 # Query # # BEGIN
-master-bin.000002 # Table_map # # table_id: # (test.t2)
-master-bin.000002 # Write_rows # # table_id: # flags: STMT_END_F
-master-bin.000002 # Query # # COMMIT
-master-bin.000002 # Rotate # # master-bin.000003;pos=<binlog_start>
-show binlog events in 'master-bin.000003' from <binlog_start>;
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
-master-bin.000003 # Binlog_checkpoint # # master-bin.000001
-SET DEBUG_SYNC= "now SIGNAL con11_continue";
-con10 is still pending, no new binlog checkpoint should have been logged.
-show binlog events in 'master-bin.000003' from <binlog_start>;
-Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
-master-bin.000003 # Binlog_checkpoint # # master-bin.000001
-SET DEBUG_SYNC= "now SIGNAL con10_continue";
-No XIDs are pending, a new binlog checkpoint should have been logged.
-show binlog events in 'master-bin.000003' from <binlog_start>;
+master-bin.000004 #
+master-bin.000005 #
+show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
-master-bin.000003 # Binlog_checkpoint # # master-bin.000001
-master-bin.000003 # Binlog_checkpoint # # master-bin.000003
-DROP TABLE t1, t2;
-SET GLOBAL max_binlog_size= @old_max_binlog_size;
+master-bin.00000<binlog_start> # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000003
+master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.00000<binlog_start>
+master-bin.00000<binlog_start> # Query # # BEGIN
+master-bin.00000<binlog_start> # Table_map # # table_id: # (test.t1)
+master-bin.00000<binlog_start> # Write_rows # # table_id: # flags: STMT_END_F
+master-bin.00000<binlog_start> # Xid # # COMMIT /* XID */
+master-bin.00000<binlog_start> # Rotate # # master-bin.000005;pos=<binlog_start>
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/t/binlog_checkpoint.test b/mysql-test/suite/binlog/t/binlog_checkpoint.test
new file mode 100644
index 00000000000..557791c77e5
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_checkpoint.test
@@ -0,0 +1,108 @@
+--source include/have_innodb.inc
+--source include/have_debug.inc
+--source include/have_debug_sync.inc
+--source include/have_binlog_format_row.inc
+
+SET @old_max_binlog_size= @@global.max_binlog_size;
+SET GLOBAL max_binlog_size= 4096;
+SET @old_innodb_flush_log_at_trx_commit= @@global.innodb_flush_log_at_trx_commit;
+SET GLOBAL innodb_flush_log_at_trx_commit= 1;
+RESET MASTER;
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Myisam;
+
+--echo *** Test that RESET MASTER waits for pending commit checkpoints to complete.
+
+# con1 will hang before doing commit checkpoint, blocking RESET MASTER.
+connect(con1,localhost,root,,);
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
+send INSERT INTO t1 VALUES (1, REPEAT("x", 4100));
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+# Let's add a few binlog rotations just for good measure.
+INSERT INTO t2 VALUES (1, REPEAT("x", 4100));
+INSERT INTO t2 VALUES (2, REPEAT("x", 4100));
+--source include/show_binary_logs.inc
+--let $binlog_file= master-bin.000004
+--let $binlog_start= 4
+--source include/show_binlog_events.inc
+SET DEBUG_SYNC= "execute_command_after_close_tables SIGNAL reset_master_done";
+send RESET MASTER;
+
+connect(con2,localhost,root,,);
+--echo This will timeout, as RESET MASTER is blocked
+SET DEBUG_SYNC= "now WAIT_FOR reset_master_done TIMEOUT 1";
+# Wake up transaction to allow RESET MASTER to complete.
+SET DEBUG_SYNC= "now SIGNAL con1_go";
+
+connection con1;
+reap;
+
+connection default;
+reap;
+--source include/show_binary_logs.inc
+--let $binlog_file= master-bin.000001
+--let $binlog_start= 4
+--source include/show_binlog_events.inc
+
+--echo *** Test that binlog N is active, and commit checkpoint for (N-1) is
+--echo *** done while there is still a pending commit checkpoint for (N-2).
+
+connection con1;
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR con1_continue";
+send INSERT INTO t1 VALUES (20, REPEAT("x", 4100));
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+
+connection con2;
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con2_ready WAIT_FOR con2_continue";
+send INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
+--source include/show_binary_logs.inc
+--let $binlog_file= master-bin.000001
+--source include/show_binlog_events.inc
+--let $binlog_file= master-bin.000002
+--source include/show_binlog_events.inc
+--let $binlog_file= master-bin.000003
+--source include/show_binlog_events.inc
+
+SET DEBUG_SYNC= "now SIGNAL con2_continue";
+
+connection con2;
+reap;
+
+connection default;
+--echo con1 is still pending, no new binlog checkpoint should have been logged.
+--let $binlog_file= master-bin.000003
+--source include/show_binlog_events.inc
+
+SET DEBUG_SYNC= "now SIGNAL con1_continue";
+
+connection con1;
+reap;
+
+connection default;
+
+--echo No commit checkpoints are pending, a new binlog checkpoint should have been logged.
+--let $binlog_file= master-bin.000003
+
+# Wait for the master-bin.000003 binlog checkpoint to appear.
+--let $wait_for_all= 0
+--let $show_statement= SHOW BINLOG EVENTS IN "$binlog_file"
+--let $field= Info
+--let $condition= = "master-bin.000003"
+--source include/wait_show_condition.inc
+
+--source include/show_binlog_events.inc
+
+
+# Cleanup
+connection default;
+DROP TABLE t1, t2;
+SET GLOBAL max_binlog_size= @old_max_binlog_size;
+SET GLOBAL innodb_flush_log_at_trx_commit= @old_innodb_flush_log_at_trx_commit;
diff --git a/mysql-test/suite/binlog/t/binlog_xa_recover-master.opt b/mysql-test/suite/binlog/t/binlog_xa_recover-master.opt
index 425fda95086..3c44f9fad10 100644
--- a/mysql-test/suite/binlog/t/binlog_xa_recover-master.opt
+++ b/mysql-test/suite/binlog/t/binlog_xa_recover-master.opt
@@ -1 +1 @@
---skip-stack-trace --skip-core-file
+--skip-stack-trace --skip-core-file --loose-debug-dbug=+d,xa_recover_expect_master_bin_000004
diff --git a/mysql-test/suite/binlog/t/binlog_xa_recover.test b/mysql-test/suite/binlog/t/binlog_xa_recover.test
index 7a4cc17112e..36b2ddecb4f 100644
--- a/mysql-test/suite/binlog/t/binlog_xa_recover.test
+++ b/mysql-test/suite/binlog/t/binlog_xa_recover.test
@@ -5,81 +5,105 @@
# Valgrind does not work well with test that crashes the server
--source include/not_valgrind.inc
+# (We do not need to restore these settings, as we crash the server).
SET GLOBAL max_binlog_size= 4096;
+SET GLOBAL innodb_flush_log_at_trx_commit= 1;
+RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
-CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Myisam;
-
-# Transactions are not guaranteed stored durably on disk in the engine until
-# they are fsync()ed, which normally happens during commit(). But there is no
-# guarantee that they will _not_ be durable, in particular loosing results
-# of a write(2) system call normally requires a kernel crash (as opposed to
-# just mysqld crash), which is inconvenient to do in a test suite.
-# So instead we do an error insert to prevent commit_ordered() from being
-# called in the engine - so nothing will be written to disk at all, and crash
-# recovery is sure to be needed.
-SET @@global.debug_dbug='+d,skip_commit_ordered';
-
-INSERT INTO t1 VALUES (0, REPEAT("x", 4100));
+# Insert some data to force a couple binlog rotations (3), so we get some
+# normal binlog checkpoints before starting the test.
+INSERT INTO t1 VALUES (100, REPEAT("x", 4100));
+INSERT INTO t1 VALUES (101, REPEAT("x", 4100));
+INSERT INTO t1 VALUES (102, REPEAT("x", 4100));
# Now start a bunch of transactions that span multiple binlog
# files. Leave then in the state prepared-but-not-committed in the engine
# and crash the server. Check that crash recovery is able to recover all
# of them.
+#
+# We use debug_sync to get all the transactions into the prepared state before
+# we commit any of them. This is because the prepare step flushes the InnoDB
+# redo log - including any commits made before, so recovery would become
+# unnecessary, decreasing the value of this test.
+#
+# We arrange to have con1 with a prepared transaction in master-bin.000004,
+# con2 and con3 with a prepared transaction in master-bin.000005, and a new
+# empty master-bin.000006. So the latest binlog checkpoint should be
+# master-bin.000006.
connect(con1,localhost,root,,);
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con1_ready WAIT_FOR _ever";
+# First wait after prepare and before write to binlog.
+SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con1_wait WAIT_FOR con1_cont";
+# Then complete InnoDB commit in memory (but not commit checkpoint / write to
+# disk), and hang until crash, leaving a transaction to be XA recovered.
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR _ever";
send INSERT INTO t1 VALUES (1, REPEAT("x", 4100));
connection default;
-SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
-INSERT INTO t2 VALUES (1, "force binlog rotation");
+SET DEBUG_SYNC= "now WAIT_FOR con1_wait";
connect(con2,localhost,root,,);
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con2_ready WAIT_FOR _ever";
+SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con2_wait WAIT_FOR con2_cont";
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con2_ready WAIT_FOR _ever";
send INSERT INTO t1 VALUES (2, NULL);
connection default;
-SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
+SET DEBUG_SYNC= "now WAIT_FOR con2_wait";
connect(con3,localhost,root,,);
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con3_ready WAIT_FOR _ever";
+SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con3_wait WAIT_FOR con3_cont";
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con3_ready WAIT_FOR _ever";
send INSERT INTO t1 VALUES (3, REPEAT("x", 4100));
+
connection default;
+SET DEBUG_SYNC= "now WAIT_FOR con3_wait";
+
+connect(con4,localhost,root,,);
+SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con4_wait WAIT_FOR con4_cont";
+SET SESSION debug_dbug="+d,crash_commit_after_log";
+send INSERT INTO t1 VALUES (4, NULL);
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR con4_wait";
+
+SET DEBUG_SYNC= "now SIGNAL con1_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+SET DEBUG_SYNC= "now SIGNAL con2_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
+SET DEBUG_SYNC= "now SIGNAL con3_cont";
SET DEBUG_SYNC= "now WAIT_FOR con3_ready";
-INSERT INTO t2 VALUES (2, "force binlog rotation");
-# So we won't get warnings about t2 being crashed.
-FLUSH TABLES t2;
# Check that everything is committed in binary log.
--source include/show_binary_logs.inc
---let $binlog_file= master-bin.000001
+--let $binlog_file= master-bin.000003
--let $binlog_start= 4
--source include/show_binlog_events.inc
---let $binlog_file= master-bin.000002
+--let $binlog_file= master-bin.000004
--source include/show_binlog_events.inc
---let $binlog_file= master-bin.000003
+--let $binlog_file= master-bin.000005
--source include/show_binlog_events.inc
---let $binlog_file= master-bin.000004
+--let $binlog_file= master-bin.000006
--source include/show_binlog_events.inc
-# Check that transactions really are not yet committed in engine.
-# (This works because of debug_dbug='+d,skip_commit_ordered').
---echo We should see only one entry here, a=0:
-SELECT a FROM t1 ORDER BY a;
-
# Check that server will not purge too much.
-PURGE BINARY LOGS TO "master-bin.000004";
+PURGE BINARY LOGS TO "master-bin.000006";
--source include/show_binary_logs.inc
# Now crash the server with one more transaction in prepared state.
-system echo wait-binlog_xa_recover.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-SET SESSION debug_dbug="+d,crash_commit_after_log";
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait-binlog_xa_recover.test
+EOF
+SET DEBUG_SYNC= "now SIGNAL con4_cont";
+connection con4;
--error 2006,2013
-INSERT INTO t1 VALUES (4, NULL);
+reap;
-system echo restart-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+--remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart-group_commit_binlog_pos.test
+EOF
connection default;
--enable_reconnect
@@ -88,87 +112,128 @@ connection default;
# Check that all transactions are recovered.
SELECT a FROM t1 ORDER BY a;
+--echo Test that with multiple binlog checkpoints, recovery starts from the last one.
+SET GLOBAL max_binlog_size= 4096;
+SET GLOBAL innodb_flush_log_at_trx_commit= 1;
+RESET MASTER;
---echo *** Test that RESET MASTER waits for pending XIDs to be unlogged.
+# Rotate to binlog master-bin.000003 while delaying binlog checkpoints.
+# So we get multiple binlog checkpoints in master-bin.000003.
+# Then complete the checkpoints, crash, and check that we only scan
+# the necessary binlog file (ie. that we use the _last_ checkpoint).
-SET @old_max_binlog_size= @@global.max_binlog_size;
-SET GLOBAL max_binlog_size= 4096;
-# con10 will hang with a pending XID, blocking RESET MASTER.
connect(con10,localhost,root,,);
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con10_ready WAIT_FOR con10_go";
-send INSERT INTO t1 VALUES (10, NULL);
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con10_ready WAIT_FOR con10_cont";
+send INSERT INTO t1 VALUES (10, REPEAT("x", 4100));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con10_ready";
-# Let's add a few binlog rotations just for good measure.
-INSERT INTO t2 VALUES (10, REPEAT("x", 4100));
-INSERT INTO t2 VALUES (11, REPEAT("x", 4100));
---source include/show_binary_logs.inc
-SET DEBUG_SYNC= "execute_command_after_close_tables SIGNAL reset_master_done";
-send RESET MASTER;
connect(con11,localhost,root,,);
---echo This will timeout, as RESET MASTER is blocked
-SET DEBUG_SYNC= "now WAIT_FOR reset_master_done TIMEOUT 1";
-# Wake up transaction to allow RESET MASTER to complete.
-SET DEBUG_SYNC= "now SIGNAL con10_go";
-
-connection con10;
-reap;
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con11_ready WAIT_FOR con11_cont";
+send INSERT INTO t1 VALUES (11, REPEAT("x", 4100));
connection default;
-reap;
---source include/show_binary_logs.inc
-
-
---echo *** Test that binlog N is active, and last pending trx in (N-1) is
---echo unlogged while there is still a pending trx in (N-2).
+SET DEBUG_SYNC= "now WAIT_FOR con11_ready";
-connection con10;
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con10_ready WAIT_FOR con10_continue";
-send INSERT INTO t1 VALUES (20, REPEAT("x", 4100));
+connect(con12,localhost,root,,);
+SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con12_ready WAIT_FOR con12_cont";
+send INSERT INTO t1 VALUES (12, REPEAT("x", 4100));
connection default;
-SET DEBUG_SYNC= "now WAIT_FOR con10_ready";
-INSERT INTO t2 VALUES (3, "force binlog rotation");
+SET DEBUG_SYNC= "now WAIT_FOR con12_ready";
+INSERT INTO t1 VALUES (13, NULL);
-connection con11;
-SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con11_ready WAIT_FOR con11_continue";
-send INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
-
-connection default;
-SET DEBUG_SYNC= "now WAIT_FOR con11_ready";
-INSERT INTO t2 VALUES (4, "force binlog rotation");
--source include/show_binary_logs.inc
---let $binlog_file= master-bin.000001
---source include/show_binlog_events.inc
---let $binlog_file= master-bin.000002
---source include/show_binlog_events.inc
---let $binlog_file= master-bin.000003
+--let $binlog_file= master-bin.000004
+--let $binlog_start= 4
--source include/show_binlog_events.inc
-SET DEBUG_SYNC= "now SIGNAL con11_continue";
-
+SET DEBUG_SYNC= "now SIGNAL con10_cont";
+connection con10;
+reap;
+connection default;
+SET DEBUG_SYNC= "now SIGNAL con12_cont";
+connection con12;
+reap;
+connection default;
+SET DEBUG_SYNC= "now SIGNAL con11_cont";
connection con11;
reap;
connection default;
---echo con10 is still pending, no new binlog checkpoint should have been logged.
---let $binlog_file= master-bin.000003
+# Wait for the last (master-bin.000004) binlog checkpoint to appear.
+--let $wait_for_all= 0
+--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000004"
+--let $field= Info
+--let $condition= = "master-bin.000004"
+--source include/wait_show_condition.inc
+
+--echo Checking that master-bin.000004 is the last binlog checkpoint
--source include/show_binlog_events.inc
-SET DEBUG_SYNC= "now SIGNAL con10_continue";
+--echo Now crash the server
+# It is not too easy to test XA recovery, as it runs early during server
+# startup, before any connections can be made.
+# What we do is set a DBUG error insert which will crash if XA recovery
+# starts from any other binlog than master-bin.000004 (check the file
+# binlog_xa_recover-master.opt). Then we will fail here if XA recovery
+# would start from the wrong place.
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait-binlog_xa_recover.test
+EOF
+SET SESSION debug_dbug="+d,crash_commit_after_log";
+--error 2006,2013
+INSERT INTO t1 VALUES (14, NULL);
-connection con10;
-reap;
+--remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart-group_commit_binlog_pos.test
+EOF
connection default;
---echo No XIDs are pending, a new binlog checkpoint should have been logged.
---let $binlog_file= master-bin.000003
---source include/show_binlog_events.inc
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+# Check that all transactions are recovered.
+SELECT a FROM t1 ORDER BY a;
+
+
+--echo *** Check that recovery works if we crashed early during rotate, before
+--echo *** binlog checkpoint event could be written.
+
+SET GLOBAL max_binlog_size= 4096;
+SET GLOBAL innodb_flush_log_at_trx_commit= 1;
+RESET MASTER;
+
+# We need some initial data to reach binlog master-bin.000004. Otherwise
+# crash recovery fails due to the error insert used for previous test.
+INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
+INSERT INTO t1 VALUES (22, REPEAT("x", 4100));
+INSERT INTO t1 VALUES (23, REPEAT("x", 4100));
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait-binlog_xa_recover.test
+EOF
+SET SESSION debug_dbug="+d,crash_before_write_checkpoint_event";
+--error 2006,2013
+INSERT INTO t1 VALUES (24, REPEAT("x", 4100));
+
+--remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart-group_commit_binlog_pos.test
+EOF
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+# Check that all transactions are recovered.
+SELECT a FROM t1 ORDER BY a;
+
+--source include/show_binary_logs.inc
+--let $binlog_file= master-bin.000004
+--let $binlog_start= 4
+--source include/show_binlog_events.inc
# Cleanup
connection default;
-DROP TABLE t1, t2;
-SET GLOBAL max_binlog_size= @old_max_binlog_size;
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb/r/binlog_consistent.result b/mysql-test/suite/innodb/r/binlog_consistent.result
index 68838e8d52b..f0b665b5ac9 100644
--- a/mysql-test/suite/innodb/r/binlog_consistent.result
+++ b/mysql-test/suite/innodb/r/binlog_consistent.result
@@ -63,15 +63,15 @@ binlog_snapshot_file master-bin.000001
binlog_snapshot_position 945
SHOW MASTER STATUS;
File Position Binlog_Do_DB Binlog_Ignore_DB
-master-bin.000002 286
+master-bin.000002 326
COMMIT;
SHOW STATUS LIKE 'binlog_snapshot_%';
Variable_name Value
binlog_snapshot_file master-bin.000002
-binlog_snapshot_position 286
+binlog_snapshot_position 326
SHOW MASTER STATUS;
File Position Binlog_Do_DB Binlog_Ignore_DB
-master-bin.000002 286
+master-bin.000002 326
SHOW BINLOG EVENTS;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 4 Format_desc 1 246 Server ver: #, Binlog ver: #
diff --git a/mysql-test/suite/innodb/r/group_commit_binlog_pos.result b/mysql-test/suite/innodb/r/group_commit_binlog_pos.result
index 29aa765c1b4..ccf458809d8 100644
--- a/mysql-test/suite/innodb/r/group_commit_binlog_pos.result
+++ b/mysql-test/suite/innodb/r/group_commit_binlog_pos.result
@@ -1,3 +1,4 @@
+SET GLOBAL innodb_flush_log_at_trx_commit=3;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);
SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
diff --git a/mysql-test/suite/innodb/r/group_commit_binlog_pos_no_optimize_thread.result b/mysql-test/suite/innodb/r/group_commit_binlog_pos_no_optimize_thread.result
index 3c3b0709331..44cf2f3979d 100644
--- a/mysql-test/suite/innodb/r/group_commit_binlog_pos_no_optimize_thread.result
+++ b/mysql-test/suite/innodb/r/group_commit_binlog_pos_no_optimize_thread.result
@@ -1,3 +1,4 @@
+SET GLOBAL innodb_flush_log_at_trx_commit=3;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);
SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
diff --git a/mysql-test/suite/innodb/t/group_commit_binlog_pos.test b/mysql-test/suite/innodb/t/group_commit_binlog_pos.test
index 72798a68a1e..213dbc9d3d8 100644
--- a/mysql-test/suite/innodb/t/group_commit_binlog_pos.test
+++ b/mysql-test/suite/innodb/t/group_commit_binlog_pos.test
@@ -17,6 +17,19 @@
# Test that we get the correct position when we group commit several
# transactions together.
+# What we really want to test here is what happens when a group of
+# transactions get written only partially to disk inside InnoDB before
+# the crash. But that is hard to test in mysql-test-run automated
+# tests. Instead, we use debug_sync to tightly control when each
+# transaction is written to the redo log. And we set
+# innodb_flush_log_at_trx_commit=3 so that we can write out
+# transactions individually - as with
+# innodb_flush_log_at_trx_commit=1, all commits are written together,
+# as part of a commit_checkpoint.
+# (Note that we do not have to restore innodb_flush_log_at_trx_commit, as
+# we crash the server).
+SET GLOBAL innodb_flush_log_at_trx_commit=3;
+
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);
diff --git a/mysql-test/suite/innodb/t/group_commit_binlog_pos_no_optimize_thread.test b/mysql-test/suite/innodb/t/group_commit_binlog_pos_no_optimize_thread.test
index e9a234577e2..3ae3c50085d 100644
--- a/mysql-test/suite/innodb/t/group_commit_binlog_pos_no_optimize_thread.test
+++ b/mysql-test/suite/innodb/t/group_commit_binlog_pos_no_optimize_thread.test
@@ -17,6 +17,19 @@
# Test that we get the correct position when we group commit several
# transactions together.
+# What we really want to test here is what happens when a group of
+# transactions get written only partially to disk inside InnoDB before
+# the crash. But that is hard to test in mysql-test-run automated
+# tests. Instead, we use debug_sync to tightly control when each
+# transaction is written to the redo log. And we set
+# innodb_flush_log_at_trx_commit=3 so that we can write out
+# transactions individually - as with
+# innodb_flush_log_at_trx_commit=1, all commits are written together,
+# as part of a commit_checkpoint.
+# (Note that we do not have to restore innodb_flush_log_at_trx_commit, as
+# we crash the server).
+SET GLOBAL innodb_flush_log_at_trx_commit=3;
+
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);
diff --git a/mysql-test/suite/rpl/r/rpl_checksum.result b/mysql-test/suite/rpl/r/rpl_checksum.result
index 9e561908a7b..fb61f159c80 100644
--- a/mysql-test/suite/rpl/r/rpl_checksum.result
+++ b/mysql-test/suite/rpl/r/rpl_checksum.result
@@ -71,7 +71,7 @@ insert into t1 values (1) /* will not be applied on slave due to simulation */;
set @@global.debug_dbug='d,simulate_slave_unaware_checksum';
start slave;
include/wait_for_slave_io_error.inc [errno=1236]
-Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'Slave can not handle replication events with the checksum that master is configured to log; the first event 'master-bin.000009' at 286, the last event read from 'master-bin.000010' at 246, the last byte read from 'master-bin.000010' at 246.''
+Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'Slave can not handle replication events with the checksum that master is configured to log; the first event 'master-bin.000009' at 326, the last event read from 'master-bin.000010' at 246, the last byte read from 'master-bin.000010' at 246.''
select count(*) as zero from t1;
zero
0
diff --git a/mysql-test/suite/rpl/r/rpl_insert_delayed,stmt.rdiff b/mysql-test/suite/rpl/r/rpl_insert_delayed,stmt.rdiff
index 5e0e7db5b63..44d8a305f61 100644
--- a/mysql-test/suite/rpl/r/rpl_insert_delayed,stmt.rdiff
+++ b/mysql-test/suite/rpl/r/rpl_insert_delayed,stmt.rdiff
@@ -36,7 +36,7 @@
a
1
On slave
-+show binlog events in 'slave-bin.000002' from <binlog_start> limit 1,6;
++show binlog events in 'slave-bin.000002' from <binlog_start> limit 2,6;
+Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000002 # Query # # BEGIN
+slave-bin.000002 # Query # # use `test`; INSERT IGNORE INTO t1 VALUES(1)
diff --git a/mysql-test/suite/rpl/r/rpl_mariadb_slave_capability.result b/mysql-test/suite/rpl/r/rpl_mariadb_slave_capability.result
index 9af3d4bbfd2..8a068ad8d72 100644
--- a/mysql-test/suite/rpl/r/rpl_mariadb_slave_capability.result
+++ b/mysql-test/suite/rpl/r/rpl_mariadb_slave_capability.result
@@ -54,7 +54,7 @@ master-bin.000002 # Query # # COMMIT
SELECT * FROM t1;
a
2
-show relaylog events in 'slave-relay-bin.000005' from <binlog_start> limit 4,5;
+show relaylog events in 'slave-relay-bin.000005' from <binlog_start> limit 5,5;
Log_name Pos Event_type Server_id End_log_pos Info
slave-relay-bin.000005 # Query # # BEGIN
slave-relay-bin.000005 # Query # # # Dummy ev
diff --git a/mysql-test/suite/rpl/r/rpl_row_log.result b/mysql-test/suite/rpl/r/rpl_row_log.result
index b9be2cd0144..13938762991 100644
--- a/mysql-test/suite/rpl/r/rpl_row_log.result
+++ b/mysql-test/suite/rpl/r/rpl_row_log.result
@@ -205,6 +205,7 @@ master-bin.000001 # Query # # COMMIT
master-bin.000001 # Rotate # # master-bin.000002;pos=4
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; create table t3 (a int)ENGINE=MyISAM
master-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=MyISAM
master-bin.000002 # Query # # BEGIN
@@ -236,6 +237,7 @@ slave-bin.000001 # Query # # use `test`; create table t3 (a int)ENGINE=MyISAM
slave-bin.000001 # Rotate # # slave-bin.000002;pos=4
show binlog events in 'slave-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=MyISAM
slave-bin.000002 # Query # # BEGIN
slave-bin.000002 # Table_map # # table_id: # (test.t2)
diff --git a/mysql-test/suite/rpl/r/rpl_row_log_innodb.result b/mysql-test/suite/rpl/r/rpl_row_log_innodb.result
index 15aa8f23b55..c9489a3dc66 100644
--- a/mysql-test/suite/rpl/r/rpl_row_log_innodb.result
+++ b/mysql-test/suite/rpl/r/rpl_row_log_innodb.result
@@ -205,6 +205,7 @@ master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=4
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; create table t3 (a int)ENGINE=InnoDB
master-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=InnoDB
master-bin.000002 # Query # # BEGIN
@@ -236,6 +237,7 @@ slave-bin.000001 # Query # # use `test`; create table t3 (a int)ENGINE=InnoDB
slave-bin.000001 # Rotate # # slave-bin.000002;pos=4
show binlog events in 'slave-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=InnoDB
slave-bin.000002 # Query # # BEGIN
slave-bin.000002 # Table_map # # table_id: # (test.t2)
diff --git a/mysql-test/suite/rpl/r/rpl_row_show_relaylog_events.result b/mysql-test/suite/rpl/r/rpl_row_show_relaylog_events.result
index 8534bf00711..a6d691f420e 100644
--- a/mysql-test/suite/rpl/r/rpl_row_show_relaylog_events.result
+++ b/mysql-test/suite/rpl/r/rpl_row_show_relaylog_events.result
@@ -128,14 +128,16 @@ DROP TABLE t1;
******** [master] SHOW BINLOG EVENTS IN <FILE> ********
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [master] SHOW BINLOG EVENTS IN <FILE> LIMIT 1 ********
show binlog events in 'master-bin.000002' from <binlog_start> limit 1;
Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
+master-bin.000002 # Binlog_checkpoint # # master-bin.000002
******** [master] SHOW BINLOG EVENTS IN <FILE> LIMIT 1,3 ********
show binlog events in 'master-bin.000002' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [master] SHOW BINLOG EVENTS ********
show binlog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
@@ -156,14 +158,16 @@ master-bin.000001 # Rotate # # master-bin.000002;pos=4
******** [slave] SHOW BINLOG EVENTS IN <FILE> ********
show binlog events in 'slave-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW BINLOG EVENTS IN <FILE> LIMIT 1 ********
show binlog events in 'slave-bin.000002' from <binlog_start> limit 1;
Log_name Pos Event_type Server_id End_log_pos Info
-slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
+slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
******** [slave] SHOW BINLOG EVENTS IN <FILE> LIMIT 1,3 ********
show binlog events in 'slave-bin.000002' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW BINLOG EVENTS ********
show binlog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
@@ -186,6 +190,7 @@ show relaylog events in 'slave-relay-bin.000006' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
slave-relay-bin.000006 # Rotate # # master-bin.000002;pos=4
slave-relay-bin.000006 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000001
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000002
slave-relay-bin.000006 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW RELAYLOG EVENTS IN <FILE> LIMIT 1 ********
@@ -196,8 +201,8 @@ slave-relay-bin.000006 # Rotate # # master-bin.000002;pos=4
show relaylog events in 'slave-relay-bin.000006' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
slave-relay-bin.000006 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000001
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000002
-slave-relay-bin.000006 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW RELAYLOG EVENTS ********
show relaylog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
diff --git a/mysql-test/suite/rpl/r/rpl_stm_log.result b/mysql-test/suite/rpl/r/rpl_stm_log.result
index 3bb3f347a43..ea4fc259b14 100644
--- a/mysql-test/suite/rpl/r/rpl_stm_log.result
+++ b/mysql-test/suite/rpl/r/rpl_stm_log.result
@@ -205,6 +205,7 @@ master-bin.000001 # Query # # COMMIT
master-bin.000001 # Rotate # # master-bin.000002;pos=4
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; create table t3 (a int)ENGINE=MyISAM
master-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=MyISAM
master-bin.000002 # Query # # BEGIN
@@ -235,6 +236,7 @@ slave-bin.000001 # Query # # use `test`; create table t3 (a int)ENGINE=MyISAM
slave-bin.000001 # Rotate # # slave-bin.000002;pos=4
show binlog events in 'slave-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=MyISAM
slave-bin.000002 # Query # # BEGIN
slave-bin.000002 # Query # # use `test`; insert into t2 values (1)
diff --git a/mysql-test/suite/rpl/r/rpl_stm_mix_show_relaylog_events.result b/mysql-test/suite/rpl/r/rpl_stm_mix_show_relaylog_events.result
index a978c3c900c..2c93a15a7b3 100644
--- a/mysql-test/suite/rpl/r/rpl_stm_mix_show_relaylog_events.result
+++ b/mysql-test/suite/rpl/r/rpl_stm_mix_show_relaylog_events.result
@@ -113,14 +113,16 @@ DROP TABLE t1;
******** [master] SHOW BINLOG EVENTS IN <FILE> ********
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [master] SHOW BINLOG EVENTS IN <FILE> LIMIT 1 ********
show binlog events in 'master-bin.000002' from <binlog_start> limit 1;
Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
+master-bin.000002 # Binlog_checkpoint # # master-bin.000002
******** [master] SHOW BINLOG EVENTS IN <FILE> LIMIT 1,3 ********
show binlog events in 'master-bin.000002' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [master] SHOW BINLOG EVENTS ********
show binlog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
@@ -138,14 +140,16 @@ master-bin.000001 # Rotate # # master-bin.000002;pos=4
******** [slave] SHOW BINLOG EVENTS IN <FILE> ********
show binlog events in 'slave-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW BINLOG EVENTS IN <FILE> LIMIT 1 ********
show binlog events in 'slave-bin.000002' from <binlog_start> limit 1;
Log_name Pos Event_type Server_id End_log_pos Info
-slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
+slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
******** [slave] SHOW BINLOG EVENTS IN <FILE> LIMIT 1,3 ********
show binlog events in 'slave-bin.000002' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW BINLOG EVENTS ********
show binlog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
@@ -165,6 +169,7 @@ show relaylog events in 'slave-relay-bin.000006' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
slave-relay-bin.000006 # Rotate # # master-bin.000002;pos=4
slave-relay-bin.000006 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000001
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000002
slave-relay-bin.000006 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW RELAYLOG EVENTS IN <FILE> LIMIT 1 ********
@@ -175,8 +180,8 @@ slave-relay-bin.000006 # Rotate # # master-bin.000002;pos=4
show relaylog events in 'slave-relay-bin.000006' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
slave-relay-bin.000006 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
+slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000001
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000002
-slave-relay-bin.000006 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW RELAYLOG EVENTS ********
show relaylog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
diff --git a/mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test b/mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test
index 36f4defa252..251136a2fe1 100644
--- a/mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test
+++ b/mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test
@@ -61,7 +61,7 @@ connection slave;
SELECT * FROM t1;
let $binlog_file= query_get_value(SHOW SLAVE STATUS, Relay_Log_File, 1);
let $binlog_start= 0;
-let $binlog_limit=4,5;
+let $binlog_limit=5,5;
--source include/show_relaylog_events.inc
--echo # Test that slave which cannot tolerate holes in binlog stream but
diff --git a/mysql-test/suite/sys_vars/r/innodb_flush_log_at_trx_commit_basic.result b/mysql-test/suite/sys_vars/r/innodb_flush_log_at_trx_commit_basic.result
index 441fb4cd362..268d40c1be3 100644
--- a/mysql-test/suite/sys_vars/r/innodb_flush_log_at_trx_commit_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_flush_log_at_trx_commit_basic.result
@@ -50,7 +50,7 @@ Warnings:
Warning 1292 Truncated incorrect innodb_flush_log_at_trx_commit value: '1001'
SELECT @@global.innodb_flush_log_at_trx_commit;
@@global.innodb_flush_log_at_trx_commit
-2
+3
'#----------------------FN_DYNVARS_046_05------------------------#'
SELECT @@global.innodb_flush_log_at_trx_commit =
VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
@@ -60,22 +60,22 @@ VARIABLE_VALUE
1
SELECT @@global.innodb_flush_log_at_trx_commit;
@@global.innodb_flush_log_at_trx_commit
-2
+3
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_flush_log_at_trx_commit';
VARIABLE_VALUE
-2
+3
'#---------------------FN_DYNVARS_046_06-------------------------#'
SET @@global.innodb_flush_log_at_trx_commit = OFF;
ERROR 42000: Incorrect argument type to variable 'innodb_flush_log_at_trx_commit'
SELECT @@global.innodb_flush_log_at_trx_commit;
@@global.innodb_flush_log_at_trx_commit
-2
+3
SET @@global.innodb_flush_log_at_trx_commit = ON;
ERROR 42000: Incorrect argument type to variable 'innodb_flush_log_at_trx_commit'
SELECT @@global.innodb_flush_log_at_trx_commit;
@@global.innodb_flush_log_at_trx_commit
-2
+3
'#---------------------FN_DYNVARS_046_07----------------------#'
SET @@global.innodb_flush_log_at_trx_commit = TRUE;
SELECT @@global.innodb_flush_log_at_trx_commit;
diff --git a/sql/handler.cc b/sql/handler.cc
index e7eddef55ed..3f201b266f0 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -644,6 +644,43 @@ void ha_checkpoint_state(bool disable)
}
+struct st_commit_checkpoint_request {
+ void *cookie;
+ void (*pre_hook)(void *);
+};
+
+static my_bool commit_checkpoint_request_handlerton(THD *unused1, plugin_ref plugin,
+ void *data)
+{
+ st_commit_checkpoint_request *st= (st_commit_checkpoint_request *)data;
+ handlerton *hton= plugin_data(plugin, handlerton *);
+ if (hton->state == SHOW_OPTION_YES && hton->commit_checkpoint_request)
+ {
+ void *cookie= st->cookie;
+ if (st->pre_hook)
+ (*st->pre_hook)(cookie);
+ (*hton->commit_checkpoint_request)(hton, cookie);
+ }
+ return FALSE;
+}
+
+
+/*
+ Invoke commit_checkpoint_request() in all storage engines that implement it.
+
+ If pre_hook is non-NULL, the hook will be called prior to each invocation.
+*/
+void
+ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *))
+{
+ st_commit_checkpoint_request st;
+ st.cookie= cookie;
+ st.pre_hook= pre_hook;
+ plugin_foreach(NULL, commit_checkpoint_request_handlerton,
+ MYSQL_STORAGE_ENGINE_PLUGIN, &st);
+}
+
+
static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
void *unused)
@@ -1281,6 +1318,7 @@ int ha_commit_trans(THD *thd, bool all)
goto done;
}
+ DEBUG_SYNC(thd, "ha_commit_trans_before_log_and_order");
cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
need_commit_ordered);
if (!cookie)
@@ -1778,6 +1816,17 @@ bool mysql_xa_recover(THD *thd)
DBUG_RETURN(0);
}
+/*
+ Called by engine to notify TC that a new commit checkpoint has been reached.
+ See comments on handlerton method commit_checkpoint_request() for details.
+*/
+void
+commit_checkpoint_notify_ha(handlerton *hton, void *cookie)
+{
+ tc_log->commit_checkpoint_notify(cookie);
+}
+
+
/**
@details
This function should be called when MySQL sends rows of a SELECT result set
diff --git a/sql/handler.h b/sql/handler.h
index 9c0850e157a..b6052412069 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -976,6 +976,46 @@ struct handlerton
int (*recover)(handlerton *hton, XID *xid_list, uint len);
int (*commit_by_xid)(handlerton *hton, XID *xid);
int (*rollback_by_xid)(handlerton *hton, XID *xid);
+ /*
+ The commit_checkpoint_request() handlerton method is used to checkpoint
+ the XA recovery process for storage engines that support two-phase
+ commit.
+
+ The method is optional - an engine that does not implemented is expected
+ to work the traditional way, where every commit() durably flushes the
+ transaction to disk in the engine before completion, so XA recovery will
+ no longer be needed for that transaction.
+
+ An engine that does implement commit_checkpoint_request() is also
+ expected to implement commit_ordered(), so that ordering of commits is
+ consistent between 2pc participants. Such engine is no longer required to
+ durably flush to disk transactions in commit(), provided that the
+ transaction has been successfully prepare()d and commit_ordered(); thus
+ potentionally saving one fsync() call. (Engine must still durably flush
+ to disk in commit() when no prepare()/commit_ordered() steps took place,
+ at least if durable commits are wanted; this happens eg. if binlog is
+ disabled).
+
+ The TC will periodically (eg. once per binlog rotation) call
+ commit_checkpoint_request(). When this happens, the engine must arrange
+ for all transaction that have completed commit_ordered() to be durably
+ flushed to disk (this does not include transactions that might be in the
+ middle of executing commit_ordered()). When such flush has completed, the
+ engine must call commit_checkpoint_notify_ha(), passing back the opaque
+ "cookie".
+
+ The flush and call of commit_checkpoint_notify_ha() need not happen
+ immediately - it can be scheduled and performed asynchroneously (ie. as
+ part of next prepare(), or sync every second, or whatever), but should
+ not be postponed indefinitely. It is however also permissible to do it
+ immediately, before returning from commit_checkpoint_request().
+
+ When commit_checkpoint_notify_ha() is called, the TC will know that the
+ transactions are durably committed, and thus no longer require XA
+ recovery. It uses that to reduce the work needed for any subsequent XA
+ recovery process.
+ */
+ void (*commit_checkpoint_request)(handlerton *hton, void *cookie);
/*
"Disable or enable checkpointing internal to the storage engine. This is
used for FLUSH TABLES WITH READ LOCK AND DISABLE CHECKPOINT to ensure that
@@ -2977,6 +3017,7 @@ void ha_close_connection(THD* thd);
bool ha_flush_logs(handlerton *db_type);
void ha_drop_database(char* path);
void ha_checkpoint_state(bool disable);
+void ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *));
int ha_create_table(THD *thd, const char *path,
const char *db, const char *table_name,
HA_CREATE_INFO *create_info,
@@ -3057,6 +3098,7 @@ int ha_binlog_end(THD *thd);
const char *get_canonical_filename(handler *file, const char *path,
char *tmp_path);
bool mysql_xa_recover(THD *thd);
+void commit_checkpoint_notify_ha(handlerton *hton, void *cookie);
inline const char *table_case_name(HA_CREATE_INFO *info, const char *name)
{
diff --git a/sql/log.cc b/sql/log.cc
index 430f0a0bf60..50c66135f24 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -479,7 +479,14 @@ public:
*/
bool using_xa;
my_xid xa_xid;
- ulong cookie;
+ bool need_unlog;
+ /*
+ Id of binlog that transaction was written to; only needed if need_unlog is
+ true.
+ */
+ ulong binlog_id;
+ /* Set if we get an error during commit that must be returned from unlog(). */
+ bool delayed_error;
private:
@@ -1678,8 +1685,7 @@ binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
So there is no work to do. Therefore, we will not increment any XID
count, so we must not decrement any XID count in unlog().
*/
- if (cache_mngr->using_xa && cache_mngr->xa_xid)
- cache_mngr->cookie= BINLOG_COOKIE_DUMMY;
+ cache_mngr->need_unlog= 0;
}
cache_mngr->reset(using_stmt, using_trx);
@@ -2904,16 +2910,16 @@ const char *MYSQL_LOG::generate_name(const char *log_name,
MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
- :current_binlog_id(BINLOG_COOKIE_START), reset_master_pending(false),
+ :reset_master_pending(false),
bytes_written(0), file_id(1), open_count(1),
- need_start_event(TRUE),
group_commit_queue(0), group_commit_queue_busy(FALSE),
num_commits(0), num_group_commits(0),
sync_period_ptr(sync_period), sync_counter(0),
is_relay_log(0), signal_cnt(0),
checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
- description_event_for_exec(0), description_event_for_queue(0)
+ description_event_for_exec(0), description_event_for_queue(0),
+ current_binlog_id(0)
{
/*
We don't want to initialize locks here as such initialization depends on
@@ -2963,10 +2969,9 @@ void MYSQL_BIN_LOG::cleanup()
/* Init binlog-specific vars */
-void MYSQL_BIN_LOG::init(bool no_auto_events_arg, ulong max_size_arg)
+void MYSQL_BIN_LOG::init(ulong max_size_arg)
{
DBUG_ENTER("MYSQL_BIN_LOG::init");
- no_auto_events= no_auto_events_arg;
max_size= max_size_arg;
DBUG_PRINT("info",("max_size: %lu", max_size));
DBUG_VOID_RETURN;
@@ -3070,12 +3075,12 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
enum_log_type log_type_arg,
const char *new_name,
enum cache_type io_cache_type_arg,
- bool no_auto_events_arg,
ulong max_size_arg,
bool null_created_arg,
bool need_mutex)
{
File file= -1;
+ xid_count_per_binlog *new_xid_list_entry= NULL, *b;
DBUG_ENTER("MYSQL_BIN_LOG::open");
DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
@@ -3131,7 +3136,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
DBUG_RETURN(1); /* all warnings issued */
}
- init(no_auto_events_arg, max_size_arg);
+ init(max_size_arg);
open_count++;
@@ -3155,11 +3160,10 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
write_file_name_to_index_file= 1;
}
- if (need_start_event && !no_auto_events)
{
/*
- In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
- even if this is not the very first binlog.
+ In 4.x we put Start event only in the first binlog. But from 5.0 we
+ want a Start event even if this is not the very first binlog.
*/
Format_description_log_event s(BINLOG_VERSION);
/*
@@ -3191,42 +3195,51 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
{
char buf[FN_REFLEN];
/*
- Put this one into the list of active binlogs.
+ Construct an entry in the binlog_xid_count_list for the new binlog
+ file (we will not link it into the list until we know the new file
+ is successfully created; otherwise we would have to remove it again
+ if creation failed, which gets tricky since other threads may have
+ seen the entry in the meantime - and we do not want to hold
+ LOCK_xid_list for long periods of time).
+
Write the current binlog checkpoint into the log, so XA recovery will
know from where to start recovery.
*/
uint off= dirname_length(log_file_name);
uint len= strlen(log_file_name) - off;
char *entry_mem, *name_mem;
- xid_count_per_binlog *b, *b2;
- if (!(b = (xid_count_per_binlog *)
+ if (!(new_xid_list_entry = (xid_count_per_binlog *)
my_multi_malloc(MYF(MY_WME),
&entry_mem, sizeof(xid_count_per_binlog),
&name_mem, len,
NULL)))
goto err;
memcpy(name_mem, log_file_name+off, len);
- b->binlog_name= name_mem;
- b->binlog_name_len= len;
- b->xid_count= 0;
-
- mysql_mutex_lock(&LOCK_xid_list);
- b->binlog_id= ++current_binlog_id;
+ new_xid_list_entry->binlog_name= name_mem;
+ new_xid_list_entry->binlog_name_len= len;
+ new_xid_list_entry->xid_count= 0;
/*
- Remove any initial entries with no pending XIDs.
- Normally this will be done in unlog(), but if there are no
- transactions with an XA-capable engine at all in a given binlog
- file, unlog() will never be used and we will remove the entry here.
- */
- while ((b2= binlog_xid_count_list.head()) && b2->xid_count == 0)
- my_free(binlog_xid_count_list.get());
+ Find the name for the Initial binlog checkpoint.
- binlog_xid_count_list.push_back(b);
- b2= binlog_xid_count_list.head();
- strmake(buf, b2->binlog_name, b2->binlog_name_len);
+ Normally this will just be the first entry, as we delete entries
+ when their count drops to zero. But we scan the list to handle any
+ corner case, eg. for the first binlog file opened after startup, the
+ list will be empty.
+ */
+ mysql_mutex_lock(&LOCK_xid_list);
+ I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
+ while ((b= it++) && b->xid_count == 0)
+ ;
mysql_mutex_unlock(&LOCK_xid_list);
+ if (!b)
+ b= new_xid_list_entry;
+ strmake(buf, b->binlog_name, b->binlog_name_len);
Binlog_checkpoint_log_event ev(buf, len);
+ DBUG_EXECUTE_IF("crash_before_write_checkpoint_event",
+ flush_io_cache(&log_file);
+ mysql_file_sync(log_file.file, MYF(MY_WME));
+ DBUG_SUICIDE(););
if (ev.write(&log_file))
goto err;
bytes_written+= ev.data_written;
@@ -3302,6 +3315,23 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
#endif
}
}
+
+ if (!is_relay_log)
+ {
+ /*
+ Now the file was created successfully, so we can link in the entry for
+ the new binlog file in binlog_xid_count_list.
+ */
+ mysql_mutex_lock(&LOCK_xid_list);
+ ++current_binlog_id;
+ new_xid_list_entry->binlog_id= current_binlog_id;
+ /* Remove any initial entries with no pending XIDs. */
+ while ((b= binlog_xid_count_list.head()) && b->xid_count == 0)
+ my_free(binlog_xid_count_list.get());
+ binlog_xid_count_list.push_back(new_xid_list_entry);
+ mysql_mutex_unlock(&LOCK_xid_list);
+ }
+
log_state= LOG_OPENED;
#ifdef HAVE_REPLICATION
@@ -3320,6 +3350,8 @@ err:
Turning logging off for the whole duration of the MySQL server process. \
To turn it on again: fix the cause, \
shutdown the MySQL server and restart it.", name, errno);
+ if (new_xid_list_entry)
+ my_free(new_xid_list_entry);
if (file >= 0)
mysql_file_close(file, MYF(0));
close(LOG_CLOSE_INDEX);
@@ -3599,12 +3631,40 @@ bool MYSQL_BIN_LOG::reset_logs(THD* thd)
if (!is_relay_log)
{
/*
+ Mark that a RESET MASTER is in progress.
+ This ensures that a binlog checkpoint will not try to write binlog
+ checkpoint events, which would be useless (as we are deleting the binlog
+ anyway) and could deadlock, as we are holding LOCK_log.
+ */
+ mysql_mutex_lock(&LOCK_xid_list);
+ reset_master_pending= true;
+ mysql_mutex_unlock(&LOCK_xid_list);
+
+ /*
We are going to nuke all binary log files.
- So first wait until all pending binlog checkpoints have completed.
+ Without binlog, we cannot XA recover prepared-but-not-committed
+ transactions in engines. So force a commit checkpoint first.
+
+ Note that we take and immediately release LOCK_commit_ordered. This has
+ the effect to ensure that any on-going group commit (in
+ trx_group_commit_leader()) has completed before we request the checkpoint,
+ due to the chaining of LOCK_log and LOCK_commit_ordered in that function.
+ (We are holding LOCK_log, so no new group commit can start).
+
+ Without this, it is possible (though perhaps unlikely) that the RESET
+ MASTER could run in-between the write to the binlog and the
+ commit_ordered() in the engine of some transaction, and then a crash
+ later would leave such transaction not recoverable.
*/
+ mysql_mutex_lock(&LOCK_commit_ordered);
+ mysql_mutex_unlock(&LOCK_commit_ordered);
+
+ mark_xids_active(current_binlog_id, 1);
+ do_checkpoint_request(current_binlog_id);
+
+ /* Now wait for all checkpoint requests and pending unlog() to complete. */
mysql_mutex_lock(&LOCK_xid_list);
xid_count_per_binlog *b;
- reset_master_pending= true;
for (;;)
{
I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
@@ -3626,9 +3686,6 @@ bool MYSQL_BIN_LOG::reset_logs(THD* thd)
Now all XIDs are fully flushed to disk, and we are holding LOCK_log so
no new ones will be written. So we can proceed to delete the logs.
*/
- while ((b= binlog_xid_count_list.get()))
- my_free(b);
- reset_master_pending= false;
mysql_mutex_unlock(&LOCK_xid_list);
}
@@ -3722,10 +3779,8 @@ bool MYSQL_BIN_LOG::reset_logs(THD* thd)
goto err;
}
}
- if (!thd->slave_thread)
- need_start_event=1;
if (!open_index_file(index_file_name, 0, FALSE))
- if ((error= open(save_name, log_type, 0, io_cache_type, no_auto_events, max_size, 0, FALSE)))
+ if ((error= open(save_name, log_type, 0, io_cache_type, max_size, 0, FALSE)))
goto err;
my_free((void *) save_name);
@@ -3733,6 +3788,31 @@ err:
if (error == 1)
name= const_cast<char*>(save_name);
mysql_mutex_unlock(&LOCK_thread_count);
+
+ if (!is_relay_log)
+ {
+ xid_count_per_binlog *b;
+ /*
+ Remove all entries in the xid_count list except the last.
+ Normally we will just be deleting all the entries that we waited for to
+ drop to zero above. But if we fail during RESET MASTER for some reason
+ then we will not have created any new log file, and we may keep the last
+ of the old entries.
+ */
+ mysql_mutex_lock(&LOCK_xid_list);
+ for (;;)
+ {
+ b= binlog_xid_count_list.head();
+ DBUG_ASSERT(b /* List can never become empty. */);
+ if (b->binlog_id == current_binlog_id)
+ break;
+ DBUG_ASSERT(b->xid_count == 0);
+ my_free(binlog_xid_count_list.get());
+ }
+ reset_master_pending= false;
+ mysql_mutex_unlock(&LOCK_xid_list);
+ }
+
mysql_mutex_unlock(&LOCK_index);
mysql_mutex_unlock(&LOCK_log);
DBUG_RETURN(error);
@@ -4476,7 +4556,6 @@ int MYSQL_BIN_LOG::new_file_impl(bool need_lock)
if (log_type == LOG_BIN)
{
- if (!no_auto_events)
{
/*
We log the whole file name for log file as the user may decide
@@ -4551,7 +4630,7 @@ int MYSQL_BIN_LOG::new_file_impl(bool need_lock)
/* reopen the binary log file. */
file_to_open= new_name_ptr;
error= open(old_name, log_type, new_name_ptr, io_cache_type,
- no_auto_events, max_size, 1, FALSE);
+ max_size, 1, FALSE);
}
/* handle reopening errors */
@@ -5176,6 +5255,8 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
bool is_trans_cache= FALSE;
bool using_trans= event_info->use_trans_cache();
bool direct= event_info->use_direct_logging();
+ ulong prev_binlog_id;
+ LINT_INIT(prev_binlog_id);
if (thd->binlog_evt_union.do_union)
{
@@ -5227,6 +5308,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
file= &log_file;
my_org_b_tell= my_b_tell(file);
mysql_mutex_lock(&LOCK_log);
+ prev_binlog_id= current_binlog_id;
}
else
{
@@ -5372,7 +5454,7 @@ err:
mysql_mutex_unlock(&LOCK_log);
if (check_purge)
- purge();
+ checkpoint_and_purge(prev_binlog_id);
}
if (error)
@@ -5457,6 +5539,64 @@ bool general_log_write(THD *thd, enum enum_server_command command,
return FALSE;
}
+
+/*
+ I would like to make this function static, but this causes compiler warnings
+ when it is declared as friend function in log.h.
+*/
+void
+binlog_checkpoint_callback(void *cookie)
+{
+ MYSQL_BIN_LOG::xid_count_per_binlog *entry=
+ (MYSQL_BIN_LOG::xid_count_per_binlog *)cookie;
+ /*
+ For every supporting engine, we increment the xid_count and issue a
+ commit_checkpoint_request(). Then we can count when all
+ commit_checkpoint_notify() callbacks have occured, and then log a new
+ binlog checkpoint event.
+ */
+ mysql_bin_log.mark_xids_active(entry->binlog_id, 1);
+}
+
+
+/*
+ Request a commit checkpoint from each supporting engine.
+ This must be called after each binlog rotate, and after LOCK_log has been
+ released. The xid_count value in the xid_count_per_binlog entry was
+ incremented by 1 and will be decremented in this function; this ensures
+ that the entry will not go away early despite LOCK_log not being held.
+*/
+void
+MYSQL_BIN_LOG::do_checkpoint_request(ulong binlog_id)
+{
+ xid_count_per_binlog *entry;
+
+ /*
+ Find the binlog entry, and invoke commit_checkpoint_request() on it in
+ each supporting storage engine.
+ */
+ mysql_mutex_lock(&LOCK_xid_list);
+ I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
+ do {
+ entry= it++;
+ DBUG_ASSERT(entry /* binlog_id is always somewhere in the list. */);
+ } while (entry->binlog_id != binlog_id);
+ mysql_mutex_unlock(&LOCK_xid_list);
+
+ ha_commit_checkpoint_request(entry, binlog_checkpoint_callback);
+ /*
+ When we rotated the binlog, we incremented xid_count to make sure the
+ entry would not go away until this point, where we have done all necessary
+ commit_checkpoint_request() calls.
+ So now we can (and must) decrease the count - when it reaches zero, we
+ will know that both all pending unlog() and all pending
+ commit_checkpoint_notify() calls are done, and we can log a new binlog
+ checkpoint.
+ */
+ mark_xid_done(binlog_id, true);
+}
+
+
/**
The method executes rotation when LOCK_log is already acquired
by the caller.
@@ -5465,6 +5605,15 @@ bool general_log_write(THD *thd, enum enum_server_command command,
@param check_purge is set to true if rotation took place
@note
+ Caller _must_ check the check_purge variable. If this is set, it means
+ that the binlog was rotated, and caller _must_ ensure that
+ do_checkpoint_request() is called later with the binlog_id of the rotated
+ binlog file. The call to do_checkpoint_request() must happen after
+ LOCK_log is released (which is why we cannot simply do it here).
+ Usually, checkpoint_and_purge() is appropriate, as it will both handle
+ the checkpointing and any needed purging of old logs.
+
+ @note
If rotation fails, for instance the server was unable
to create a new log file, we still try to write an
incident event to the current log.
@@ -5482,7 +5631,27 @@ int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
if (force_rotate || (my_b_tell(&log_file) >= (my_off_t) max_size))
{
+ ulong binlog_id= current_binlog_id;
+ /*
+ We rotate the binlog, so we need to start a commit checkpoint in all
+ supporting engines - when it finishes, we can log a new binlog checkpoint
+ event.
+
+ But we cannot start the checkpoint here - there could be a group commit
+ still in progress which needs to be included in the checkpoint, and
+ besides we do not want to do the (possibly expensive) checkpoint while
+ LOCK_log is held.
+
+ On the other hand, we must be sure that the xid_count entry for the
+ previous log does not go away until we start the checkpoint - which it
+ could do as it is no longer the most recent. So we increment xid_count
+ (to count the pending checkpoint request) - this will fix the entry in
+ place until we decrement again in do_checkpoint_request().
+ */
+ mark_xids_active(binlog_id, 1);
+
if ((error= new_file_without_locking()))
+ {
/**
Be conservative... There are possible lost events (eg,
failing to log the Execute_load_query_log_event
@@ -5495,7 +5664,14 @@ int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
if (!write_incident_already_locked(current_thd))
flush_and_sync(0);
- *check_purge= true;
+ /*
+ We failed to rotate - so we have to decrement the xid_count back that
+ we incremented before attempting the rotate.
+ */
+ mark_xid_done(binlog_id, false);
+ }
+ else
+ *check_purge= true;
}
DBUG_RETURN(error);
}
@@ -5523,6 +5699,13 @@ void MYSQL_BIN_LOG::purge()
#endif
}
+
+void MYSQL_BIN_LOG::checkpoint_and_purge(ulong binlog_id)
+{
+ do_checkpoint_request(binlog_id);
+ purge();
+}
+
/**
The method is a shortcut of @c rotate() and @c purge().
LOCK_log is acquired prior to rotate and is released after it.
@@ -5535,11 +5718,13 @@ void MYSQL_BIN_LOG::purge()
int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate)
{
int error= 0;
+ ulong prev_binlog_id;
DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
bool check_purge= false;
//todo: fix the macro def and restore safe_mutex_assert_not_owner(&LOCK_log);
mysql_mutex_lock(&LOCK_log);
+ prev_binlog_id= current_binlog_id;
if ((error= rotate(force_rotate, &check_purge)))
check_purge= false;
/*
@@ -5549,7 +5734,7 @@ int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate)
mysql_mutex_unlock(&LOCK_log);
if (check_purge)
- purge();
+ checkpoint_and_purge(prev_binlog_id);
DBUG_RETURN(error);
}
@@ -5880,11 +6065,13 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd)
uint error= 0;
my_off_t offset;
bool check_purge= false;
+ ulong prev_binlog_id;
DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
mysql_mutex_lock(&LOCK_log);
if (likely(is_open()))
{
+ prev_binlog_id= current_binlog_id;
if (!(error= write_incident_already_locked(thd)) &&
!(error= flush_and_sync(0)))
{
@@ -5904,7 +6091,7 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd)
mysql_mutex_unlock(&LOCK_log);
if (check_purge)
- purge();
+ checkpoint_and_purge(prev_binlog_id);
}
DBUG_RETURN(error);
@@ -5914,6 +6101,7 @@ void
MYSQL_BIN_LOG::write_binlog_checkpoint_event_already_locked(const char *name,
uint len)
{
+ my_off_t offset;
Binlog_checkpoint_log_event ev(name, len);
/*
Note that we must sync the binlog checkpoint to disk.
@@ -5922,22 +6110,29 @@ MYSQL_BIN_LOG::write_binlog_checkpoint_event_already_locked(const char *name,
*/
if (!ev.write(&log_file) && !flush_and_sync(0))
{
- bool check_purge= false;
signal_update();
- rotate(false, &check_purge);
- if (check_purge)
- purge();
- return;
+ }
+ else
+ {
+ /*
+ If we fail to write the checkpoint event, something is probably really
+ bad with the binlog. We complain in the error log.
+
+ Note that failure to write binlog checkpoint does not compromise the
+ ability to do crash recovery - crash recovery will just have to scan a
+ bit more of the binlog than strictly necessary.
+ */
+ sql_print_error("Failed to write binlog checkpoint event to binary log\n");
}
+ offset= my_b_tell(&log_file);
/*
- If we fail to write the checkpoint event, something is probably really
- bad with the binlog. We complain in the error log.
- Note that failure to write binlog checkpoint does not compromise the
- ability to do crash recovery - crash recovery will just have to scan a
- bit more of the binlog than strictly necessary.
+ Take mutex to protect against a reader seeing partial writes of 64-bit
+ offset on 32-bit CPUs.
*/
- sql_print_error("Failed to write binlog checkpoint event to binary log\n");
+ mysql_mutex_lock(&LOCK_commit_ordered);
+ last_commit_pos_offset= offset;
+ mysql_mutex_unlock(&LOCK_commit_ordered);
}
@@ -5973,6 +6168,7 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
bool using_trx_cache)
{
group_commit_entry entry;
+ Ha_trx_info *ha_info;
DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
entry.thd= thd;
@@ -5981,6 +6177,15 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
entry.all= all;
entry.using_stmt_cache= using_stmt_cache;
entry.using_trx_cache= using_trx_cache;
+ entry.need_unlog= false;
+ ha_info= all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
+ for (; ha_info; ha_info= ha_info->next())
+ {
+ if (ha_info->is_started() && ha_info->ht() != binlog_hton &&
+ !ha_info->ht()->commit_checkpoint_request)
+ entry.need_unlog= true;
+ break;
+ }
/*
Log "BEGIN" at the beginning of every transaction. Here, a transaction is
@@ -6069,6 +6274,18 @@ MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
{
next->thd->signal_wakeup_ready();
}
+ else
+ {
+ /*
+ If we rotated the binlog, and if we are using the unoptimized thread
+ scheduling where every thread runs its own commit_ordered(), then we
+ must do the commit checkpoint and log purge here, after all
+ commit_ordered() calls have finished, and locks have been released.
+ */
+ if (entry->check_purge)
+ checkpoint_and_purge(entry->binlog_id);
+ }
+
}
if (likely(!entry->error))
@@ -6099,8 +6316,9 @@ MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
we need to mark it as not needed for recovery (unlog() is not called
for a transaction if log_xid() fails).
*/
- if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid)
- mark_xid_done(entry->cache_mngr->cookie);
+ if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid &&
+ entry->cache_mngr->need_unlog)
+ mark_xid_done(entry->cache_mngr->binlog_id, true);
return 1;
}
@@ -6120,10 +6338,12 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
{
uint xid_count= 0;
my_off_t UNINIT_VAR(commit_offset);
- group_commit_entry *current;
+ group_commit_entry *current, *last_in_queue;
group_commit_entry *queue= NULL;
bool check_purge= false;
+ ulong binlog_id;
DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
+ LINT_INIT(binlog_id);
DBUG_ASSERT(is_open());
if (likely(is_open())) // Should always be true
@@ -6134,6 +6354,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
*/
mysql_mutex_lock(&LOCK_log);
DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log");
+ binlog_id= current_binlog_id;
mysql_mutex_lock(&LOCK_prepare_ordered);
current= group_commit_queue;
@@ -6141,6 +6362,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
mysql_mutex_unlock(&LOCK_prepare_ordered);
/* As the queue is in reverse order of entering, reverse it. */
+ last_in_queue= current;
while (current)
{
group_commit_entry *next= current->next;
@@ -6180,8 +6402,22 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
cache_mngr->last_commit_pos_offset= commit_offset;
if (cache_mngr->using_xa && cache_mngr->xa_xid)
{
- xid_count++;
- cache_mngr->cookie= current_binlog_id;
+ /*
+ If all storage engines support commit_checkpoint_request(), then we
+ do not need to keep track of when this XID is durably committed.
+ Instead we will just ask the storage engine to durably commit all its
+ XIDs when we rotate a binlog file.
+ */
+ if (current->need_unlog)
+ {
+ xid_count++;
+ cache_mngr->need_unlog= true;
+ cache_mngr->binlog_id= binlog_id;
+ }
+ else
+ cache_mngr->need_unlog= false;
+
+ cache_mngr->delayed_error= false;
}
}
@@ -6232,21 +6468,27 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
*/
if (xid_count > 0)
{
- mark_xids_active(current_binlog_id, xid_count);
+ mark_xids_active(binlog_id, xid_count);
}
- else
+
+ if (rotate(false, &check_purge))
{
- if (rotate(false, &check_purge))
- {
- /*
- If we fail to rotate, which thread should get the error?
- We give the error to the leader, as any my_error() thrown inside
- rotate() will have been registered for the leader THD.
- */
- leader->error= ER_ERROR_ON_WRITE;
- leader->commit_errno= errno;
- check_purge= false;
- }
+ /*
+ If we fail to rotate, which thread should get the error?
+ We give the error to the leader, as any my_error() thrown inside
+ rotate() will have been registered for the leader THD.
+
+ However we must not return error from here - that would cause
+ ha_commit_trans() to abort and rollback the transaction, which would
+ leave an inconsistent state with the transaction committed in the
+ binlog but rolled back in the engine.
+
+ Instead set a flag so that we can return error later, from unlog(),
+ when the transaction has been safely committed in the engine.
+ */
+ leader->cache_mngr->delayed_error= true;
+ my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, errno);
+ check_purge= false;
}
}
@@ -6278,6 +6520,15 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
mysql_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
group_commit_queue_busy= TRUE;
+ /*
+ Set these so parent can run checkpoint_and_purge() in last thread.
+ (When using optimized thread scheduling, we run checkpoint_and_purge()
+ in this function, so parent does not need to and we need not set these
+ values).
+ */
+ last_in_queue->check_purge= check_purge;
+ last_in_queue->binlog_id= binlog_id;
+
/* Note that we return with LOCK_commit_ordered locked! */
DBUG_VOID_RETURN;
}
@@ -6308,9 +6559,10 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
}
DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered");
mysql_mutex_unlock(&LOCK_commit_ordered);
+ DEBUG_SYNC(leader->thd, "commit_after_group_release_commit_ordered");
if (check_purge)
- purge();
+ checkpoint_and_purge(binlog_id);
DBUG_VOID_RETURN;
}
@@ -6470,7 +6722,7 @@ void MYSQL_BIN_LOG::close(uint exiting)
if (log_state == LOG_OPENED)
{
#ifdef HAVE_REPLICATION
- if (log_type == LOG_BIN && !no_auto_events &&
+ if (log_type == LOG_BIN &&
(exiting & LOG_CLOSE_STOP_EVENT))
{
Stop_log_event s;
@@ -7104,6 +7356,8 @@ int TC_LOG_MMAP::open(const char *opt_name)
mysql_mutex_init(key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_LOCK_active, &LOCK_active, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST);
+ mysql_mutex_init(key_LOCK_pending_checkpoint, &LOCK_pending_checkpoint,
+ MY_MUTEX_INIT_FAST);
mysql_cond_init(key_COND_active, &COND_active, 0);
mysql_cond_init(key_COND_pool, &COND_pool, 0);
mysql_cond_init(key_TC_LOG_MMAP_COND_queue_busy, &COND_queue_busy, 0);
@@ -7356,17 +7610,93 @@ int TC_LOG_MMAP::sync()
return err;
}
+static void
+mmap_do_checkpoint_callback(void *data)
+{
+ TC_LOG_MMAP::pending_cookies *pending=
+ static_cast<TC_LOG_MMAP::pending_cookies *>(data);
+ ++pending->pending_count;
+}
+
+int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
+{
+ pending_cookies *full_buffer= NULL;
+ DBUG_ASSERT(*(my_xid *)(data+cookie) == xid);
+
+ /*
+ Do not delete the entry immediately, as there may be participating storage
+ engines which implement commit_checkpoint_request(), and thus have not yet
+ flushed the commit durably to disk.
+
+ Instead put it in a queue - and periodically, we will request a checkpoint
+ from all engines and delete a whole batch at once.
+ */
+ mysql_mutex_lock(&LOCK_pending_checkpoint);
+ if (pending_checkpoint == NULL)
+ {
+ uint32 size= sizeof(*pending_checkpoint);
+ if (!(pending_checkpoint=
+ (pending_cookies *)my_malloc(size, MYF(MY_ZEROFILL))))
+ {
+ my_error(ER_OUTOFMEMORY, MYF(0), size);
+ mysql_mutex_unlock(&LOCK_pending_checkpoint);
+ return 1;
+ }
+ }
+
+ pending_checkpoint->cookies[pending_checkpoint->count++]= cookie;
+ if (pending_checkpoint->count == sizeof(pending_checkpoint->cookies) /
+ sizeof(pending_checkpoint->cookies[0]))
+ {
+ full_buffer= pending_checkpoint;
+ pending_checkpoint= NULL;
+ }
+ mysql_mutex_unlock(&LOCK_pending_checkpoint);
+
+ if (full_buffer)
+ {
+ /*
+ We do an extra increment and notify here - this ensures that
+ things work also if there are no engines at all that support
+ commit_checkpoint_request.
+ */
+ ++full_buffer->pending_count;
+ ha_commit_checkpoint_request(full_buffer, mmap_do_checkpoint_callback);
+ commit_checkpoint_notify(full_buffer);
+ }
+ return 0;
+}
+
+
+void
+TC_LOG_MMAP::commit_checkpoint_notify(void *cookie)
+{
+ uint count;
+ pending_cookies *pending= static_cast<pending_cookies *>(cookie);
+ mysql_mutex_lock(&LOCK_pending_checkpoint);
+ DBUG_ASSERT(pending->pending_count > 0);
+ count= --pending->pending_count;
+ mysql_mutex_unlock(&LOCK_pending_checkpoint);
+ if (count == 0)
+ {
+ uint i;
+ for (i= 0; i < sizeof(pending->cookies)/sizeof(pending->cookies[0]); ++i)
+ delete_entry(pending->cookies[i]);
+ my_free(pending);
+ }
+}
+
+
/**
erase xid from the page, update page free space counters/pointers.
cookie points directly to the memory where xid was logged.
*/
-int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
+int TC_LOG_MMAP::delete_entry(ulong cookie)
{
PAGE *p=pages+(cookie/tc_log_page_size);
my_xid *x=(my_xid *)(data+cookie);
- DBUG_ASSERT(*x == xid);
DBUG_ASSERT(x >= p->start && x < p->end);
mysql_mutex_lock(&p->lock);
@@ -7390,6 +7720,7 @@ void TC_LOG_MMAP::close()
mysql_mutex_destroy(&LOCK_sync);
mysql_mutex_destroy(&LOCK_active);
mysql_mutex_destroy(&LOCK_pool);
+ mysql_mutex_destroy(&LOCK_pending_checkpoint);
mysql_cond_destroy(&COND_pool);
mysql_cond_destroy(&COND_active);
mysql_cond_destroy(&COND_queue_busy);
@@ -7412,9 +7743,12 @@ void TC_LOG_MMAP::close()
}
if (inited>=5) // cannot do in the switch because of Windows
mysql_file_delete(key_file_tclog, logname, MYF(MY_WME));
+ if (pending_checkpoint)
+ my_free(pending_checkpoint);
inited=0;
}
+
int TC_LOG_MMAP::recover()
{
HASH xids;
@@ -7518,7 +7852,7 @@ int TC_LOG_BINLOG::open(const char *opt_name)
if (using_heuristic_recover())
{
/* generate a new binlog to mask a corrupted one */
- open(opt_name, LOG_BIN, 0, WRITE_CACHE, 0, max_binlog_size, 0, TRUE);
+ open(opt_name, LOG_BIN, 0, WRITE_CACHE, max_binlog_size, 0, TRUE);
cleanup();
return 1;
}
@@ -7606,9 +7940,6 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
cache_mngr->using_xa= TRUE;
cache_mngr->xa_xid= xid;
-#ifndef DBUG_OFF
- cache_mngr->cookie= 0;
-#endif
err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid);
DEBUG_SYNC(thd, "binlog_after_log_and_order");
@@ -7619,10 +7950,11 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
If using explicit user XA, we will not have XID. We must still return a
non-zero cookie (as zero cookie signals error).
*/
- if (!xid)
- DBUG_RETURN(BINLOG_COOKIE_DUMMY);
- DBUG_ASSERT(cache_mngr->cookie != 0);
- DBUG_RETURN(cache_mngr->cookie);
+ if (!xid || !cache_mngr->need_unlog)
+ DBUG_RETURN(BINLOG_COOKIE_DUMMY(cache_mngr->delayed_error));
+ else
+ DBUG_RETURN(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id,
+ cache_mngr->delayed_error));
}
/*
@@ -7637,19 +7969,18 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
binary log.
*/
void
-TC_LOG_BINLOG::mark_xids_active(ulong cookie, uint xid_count)
+TC_LOG_BINLOG::mark_xids_active(ulong binlog_id, uint xid_count)
{
xid_count_per_binlog *b;
DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
- DBUG_PRINT("info", ("cookie=%lu xid_count=%u", cookie, xid_count));
- DBUG_ASSERT(cookie != 0 && cookie != BINLOG_COOKIE_DUMMY);
+ DBUG_PRINT("info", ("binlog_id=%lu xid_count=%u", binlog_id, xid_count));
mysql_mutex_lock(&LOCK_xid_list);
I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
while ((b= it++))
{
- if (b->binlog_id == cookie)
+ if (b->binlog_id == binlog_id)
{
b->xid_count += xid_count;
break;
@@ -7675,15 +8006,13 @@ TC_LOG_BINLOG::mark_xids_active(ulong cookie, uint xid_count)
checkpoint.
*/
void
-TC_LOG_BINLOG::mark_xid_done(ulong cookie)
+TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
{
xid_count_per_binlog *b;
bool first;
ulong current;
DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
- if (cookie == BINLOG_COOKIE_DUMMY)
- DBUG_VOID_RETURN; /* Nothing to do. */
mysql_mutex_lock(&LOCK_xid_list);
current= current_binlog_id;
@@ -7691,7 +8020,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong cookie)
first= true;
while ((b= it++))
{
- if (b->binlog_id == cookie)
+ if (b->binlog_id == binlog_id)
{
--b->xid_count;
break;
@@ -7700,8 +8029,22 @@ TC_LOG_BINLOG::mark_xid_done(ulong cookie)
}
/* Binlog is always found, as we do not remove until count reaches 0 */
DBUG_ASSERT(b);
- if (likely(cookie == current && !reset_master_pending) ||
- b->xid_count != 0 || !first)
+ /*
+ If a RESET MASTER is pending, we are about to remove all log files, and
+ the RESET MASTER thread is waiting for all pending unlog() calls to
+ complete while holding LOCK_log. In this case we should not log a binlog
+ checkpoint event (it would be deleted immediately anyway and we would
+ deadlock on LOCK_log) but just signal the thread.
+ */
+ if (unlikely(reset_master_pending))
+ {
+ mysql_cond_signal(&COND_xid_list);
+ mysql_mutex_unlock(&LOCK_xid_list);
+ DBUG_VOID_RETURN;
+ }
+
+ if (likely(binlog_id == current) || b->xid_count != 0 || !first ||
+ !write_checkpoint)
{
/* No new binlog checkpoint reached yet. */
mysql_mutex_unlock(&LOCK_xid_list);
@@ -7726,40 +8069,27 @@ TC_LOG_BINLOG::mark_xid_done(ulong cookie)
LOCK_log, then re-aquire LOCK_xid_list. If we were to take LOCK_log while
holding LOCK_xid_list, we might deadlock with other threads that take the
locks in the opposite order.
-
- If a RESET MASTER is pending, we are about to remove all log files, and
- the RESET MASTER thread is waiting for all pending unlog() calls to
- complete while holding LOCK_log. In this case we should not log a binlog
- checkpoint event (it would be deleted immediately anywat and we would
- deadlock on LOCK_log) but just signal the thread.
*/
- if (!reset_master_pending)
- {
- mysql_mutex_unlock(&LOCK_xid_list);
- mysql_mutex_lock(&LOCK_log);
- mysql_mutex_lock(&LOCK_xid_list);
- }
+
+ mysql_mutex_unlock(&LOCK_xid_list);
+ mysql_mutex_lock(&LOCK_log);
+ mysql_mutex_lock(&LOCK_xid_list);
+ /* We need to reload current_binlog_id due to release/re-take of lock. */
+ current= current_binlog_id;
+
for (;;)
{
/* Remove initial element(s) with zero count. */
b= binlog_xid_count_list.head();
/*
- Normally, we must not remove all elements in the list.
- Only if a RESET MASTER is in progress may we delete everything - RESET
- MASTER has LOCK_log held, and will create a new initial element before
- releasing the lock.
+ We must not remove all elements in the list - the entry for the current
+ binlog must be present always.
*/
- DBUG_ASSERT(b || reset_master_pending);
- if (unlikely(!b) || b->binlog_id == current || b->xid_count > 0)
+ DBUG_ASSERT(b);
+ if (b->binlog_id == current || b->xid_count > 0)
break;
my_free(binlog_xid_count_list.get());
}
- if (reset_master_pending)
- {
- mysql_cond_signal(&COND_xid_list);
- mysql_mutex_unlock(&LOCK_xid_list);
- DBUG_VOID_RETURN;
- }
mysql_mutex_unlock(&LOCK_xid_list);
write_binlog_checkpoint_event_already_locked(b->binlog_name,
@@ -7771,10 +8101,22 @@ TC_LOG_BINLOG::mark_xid_done(ulong cookie)
int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
{
DBUG_ENTER("TC_LOG_BINLOG::unlog");
- if (xid)
- mark_xid_done(cookie);
- /* As ::write_transaction_to_binlog() did not rotate, do it here. */
- DBUG_RETURN(rotate_and_purge(0));
+ if (!xid)
+ DBUG_RETURN(0);
+
+ if (!BINLOG_COOKIE_IS_DUMMY(cookie))
+ mark_xid_done(BINLOG_COOKIE_GET_ID(cookie), true);
+ /*
+ See comment in trx_group_commit_leader() - if rotate() gave a failure,
+ we delay the return of error code to here.
+ */
+ DBUG_RETURN(BINLOG_COOKIE_GET_ERROR_FLAG(cookie));
+}
+
+void
+TC_LOG_BINLOG::commit_checkpoint_notify(void *cookie)
+{
+ mark_xid_done(((xid_count_per_binlog *)cookie)->binlog_id, true);
}
int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
@@ -7871,6 +8213,11 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
if (!binlog_checkpoint_found)
break;
first_round= false;
+ DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004",
+ if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) &&
+ 0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name))
+ DBUG_SUICIDE();
+ );
if (find_log_pos(linfo, binlog_checkpoint_name, 1))
{
sql_print_error("Binlog file '%s' not found in binlog index, needed "
@@ -7983,10 +8330,13 @@ binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var,
{
ulong value= *((ulong *)save);
bool check_purge= false;
+ ulong prev_binlog_id;
+ LINT_INIT(prev_binlog_id);
mysql_mutex_lock(mysql_bin_log.get_log_lock());
if(mysql_bin_log.is_open())
{
+ prev_binlog_id= mysql_bin_log.current_binlog_id;
if (binlog_checksum_options != value)
mysql_bin_log.checksum_alg_reset= (uint8) value;
if (mysql_bin_log.rotate(true, &check_purge))
@@ -8000,7 +8350,7 @@ binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var,
mysql_bin_log.checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF;
mysql_mutex_unlock(mysql_bin_log.get_log_lock());
if (check_purge)
- mysql_bin_log.purge();
+ mysql_bin_log.checkpoint_and_purge(prev_binlog_id);
}
diff --git a/sql/log.h b/sql/log.h
index 179d302d2cc..cd1845908ef 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -49,6 +49,7 @@ class TC_LOG
bool need_prepare_ordered,
bool need_commit_ordered) = 0;
virtual int unlog(ulong cookie, my_xid xid)=0;
+ virtual void commit_checkpoint_notify(void *cookie)= 0;
protected:
/*
@@ -98,8 +99,12 @@ public:
return 1;
}
int unlog(ulong cookie, my_xid xid) { return 0; }
+ void commit_checkpoint_notify(void *cookie) { DBUG_ASSERT(0); };
};
+#define TC_LOG_PAGE_SIZE 8192
+#define TC_LOG_MIN_SIZE (3*TC_LOG_PAGE_SIZE)
+
#ifdef HAVE_MMAP
class TC_LOG_MMAP: public TC_LOG
{
@@ -110,6 +115,12 @@ class TC_LOG_MMAP: public TC_LOG
PS_DIRTY // new xids added since last sync
} PAGE_STATE;
+ struct pending_cookies {
+ uint count;
+ uint pending_count;
+ ulong cookies[TC_LOG_PAGE_SIZE];
+ };
+
private:
typedef struct st_page {
struct st_page *next; // page a linked in a fifo queue
@@ -141,7 +152,7 @@ class TC_LOG_MMAP: public TC_LOG
one has to use active->lock.
Same for LOCK_pool and LOCK_sync
*/
- mysql_mutex_t LOCK_active, LOCK_pool, LOCK_sync;
+ mysql_mutex_t LOCK_active, LOCK_pool, LOCK_sync, LOCK_pending_checkpoint;
mysql_cond_t COND_pool, COND_active;
/*
Queue of threads that need to call commit_ordered().
@@ -163,14 +174,16 @@ class TC_LOG_MMAP: public TC_LOG
*/
mysql_cond_t COND_queue_busy;
my_bool commit_ordered_queue_busy;
+ pending_cookies* pending_checkpoint;
public:
- TC_LOG_MMAP(): inited(0) {}
+ TC_LOG_MMAP(): inited(0), pending_checkpoint(0) {}
int open(const char *opt_name);
void close();
int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered, bool need_commit_ordered);
int unlog(ulong cookie, my_xid xid);
+ void commit_checkpoint_notify(void *cookie);
int recover();
private:
@@ -178,6 +191,7 @@ class TC_LOG_MMAP: public TC_LOG
void get_active_from_pool();
int sync();
int overflow();
+ int delete_entry(ulong cookie);
};
#else
#define TC_LOG_MMAP TC_LOG_DUMMY
@@ -356,12 +370,32 @@ private:
/*
We assign each binlog file an internal ID, used to identify them for unlog().
- Ids start from BINLOG_COOKIE_START; the value BINLOG_COOKIE_DUMMY is special
- meaning "no binlog" (we cannot use zero as that is reserved for error return
- from log_and_order).
-*/
-#define BINLOG_COOKIE_DUMMY 1
-#define BINLOG_COOKIE_START 2
+ The IDs start from 0 and increment for each new binlog created.
+
+ In unlog() we need to know the ID of the binlog file that the corresponding
+ transaction was written into. We also need a special value for a corner
+ case where there is no corresponding binlog id (since nothing was logged).
+ And we need an error flag to mark that unlog() must return failure.
+
+ We use the following macros to pack all of this information into the single
+ ulong available with log_and_order() / unlog().
+
+ Note that we cannot use the value 0 for cookie, as that is reserved as error
+ return value from log_and_order().
+ */
+#define BINLOG_COOKIE_ERROR_RETURN 0
+#define BINLOG_COOKIE_DUMMY_ID 1
+#define BINLOG_COOKIE_BASE 2
+#define BINLOG_COOKIE_DUMMY(error_flag) \
+ ( (BINLOG_COOKIE_DUMMY_ID<<1) | ((error_flag)&1) )
+#define BINLOG_COOKIE_MAKE(id, error_flag) \
+ ( (((id)+BINLOG_COOKIE_BASE)<<1) | ((error_flag)&1) )
+#define BINLOG_COOKIE_GET_ERROR_FLAG(c) ((c) & 1)
+#define BINLOG_COOKIE_GET_ID(c) ( ((ulong)(c)>>1) - BINLOG_COOKIE_BASE )
+#define BINLOG_COOKIE_IS_DUMMY(c) \
+ ( ((ulong)(c)>>1) == BINLOG_COOKIE_DUMMY_ID )
+
+void binlog_checkpoint_callback(void *cookie);
class binlog_cache_mngr;
class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
@@ -401,11 +435,25 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
IO_CACHE *error_cache;
/* This is the `all' parameter for ha_commit_ordered(). */
bool all;
+ /*
+ True if we need to increment xid_count in trx_group_commit_leader() and
+ decrement in unlog() (this is needed if there is a participating engine
+ that does not implement the commit_checkpoint_request() handlerton
+ method).
+ */
+ bool need_unlog;
+ /*
+ Fields used to pass the necessary information to the last thread in a
+ group commit, only used when opt_optimize_thread_scheduling is not set.
+ */
+ bool check_purge;
+ ulong binlog_id;
};
/*
A list of struct xid_count_per_binlog is used to keep track of how many
- XIDs are in prepared, but not committed, state in each binlog.
+ XIDs are in prepared, but not committed, state in each binlog. And how
+ many commit_checkpoint_request()'s are pending.
When count drops to zero in a binlog after rotation, it means that there
are no more XIDs in prepared state, so that binlog is no longer needed
@@ -418,10 +466,10 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
char *binlog_name;
uint binlog_name_len;
ulong binlog_id;
+ /* Total prepared XIDs and pending checkpoint requests in this binlog. */
long xid_count;
xid_count_per_binlog(); /* Give link error if constructor used. */
};
- ulong current_binlog_id;
I_List<xid_count_per_binlog> binlog_xid_count_list;
/*
When this is set, a RESET MASTER is in progress.
@@ -432,6 +480,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
checkpoint arrives - when all have arrived, RESET MASTER will complete.
*/
bool reset_master_pending;
+ friend void binlog_checkpoint_callback(void *cookie);
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
mysql_mutex_t LOCK_index;
@@ -464,15 +513,6 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
uint file_id;
uint open_count; // For replication
int readers_count;
- bool need_start_event;
- /*
- no_auto_events means we don't want any of these automatic events :
- Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't
- want a Rotate_log event to be written to the relay log. When we start a
- relay log etc. So in 4.x this is 1 for relay logs, 0 for binlogs.
- In 5.0 it's 0 for relay logs too!
- */
- bool no_auto_events;
/* Queue of transactions queued up to participate in group commit. */
group_commit_entry *group_commit_queue;
/*
@@ -508,10 +548,12 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
*/
int new_file_without_locking();
int new_file_impl(bool need_lock);
+ void do_checkpoint_request(ulong binlog_id);
+ void purge();
int write_transaction_or_stmt(group_commit_entry *entry);
bool write_transaction_to_binlog_events(group_commit_entry *entry);
void trx_group_commit_leader(group_commit_entry *leader);
- void mark_xid_done(ulong cookie);
+ void mark_xid_done(ulong cookie, bool write_checkpoint);
void mark_xids_active(ulong cookie, uint xid_count);
public:
@@ -572,6 +614,7 @@ public:
*/
char last_commit_pos_file[FN_REFLEN];
my_off_t last_commit_pos_offset;
+ ulong current_binlog_id;
MYSQL_BIN_LOG(uint *sync_period);
/*
@@ -600,6 +643,7 @@ public:
int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered, bool need_commit_ordered);
int unlog(ulong cookie, my_xid xid);
+ void commit_checkpoint_notify(void *cookie);
int recover(LOG_INFO *linfo, const char *last_log_name, IO_CACHE *first_log,
Format_description_log_event *fdle);
#if !defined(MYSQL_CLIENT)
@@ -629,15 +673,14 @@ public:
void signal_update();
void wait_for_update_relay_log(THD* thd);
int wait_for_update_bin_log(THD* thd, const struct timespec * timeout);
- void set_need_start_event() { need_start_event = 1; }
- void init(bool no_auto_events_arg, ulong max_size);
+ void init(ulong max_size);
void init_pthread_objects();
void cleanup();
bool open(const char *log_name,
enum_log_type log_type,
const char *new_name,
enum cache_type io_cache_type_arg,
- bool no_auto_events_arg, ulong max_size,
+ ulong max_size,
bool null_created,
bool need_mutex);
bool open_index_file(const char *index_file_name_arg,
@@ -674,7 +717,7 @@ public:
bool can_purge_log(const char *log_file_name);
int update_log_index(LOG_INFO* linfo, bool need_update_threads);
int rotate(bool force_rotate, bool* check_purge);
- void purge();
+ void checkpoint_and_purge(ulong binlog_id);
int rotate_and_purge(bool force_rotate);
/**
Flush binlog cache and synchronize to disk.
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 6cefdb4fa59..25ab7b7c870 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -5874,7 +5874,7 @@ Rotate_log_event::do_shall_skip(Relay_log_info *rli)
**************************************************************************/
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
-void Binlog_checkpoint_log_event::pack_info(Protocol *protocol)
+void Binlog_checkpoint_log_event::pack_info(THD *thd, Protocol *protocol)
{
protocol->store(binlog_file_name, binlog_file_len, &my_charset_bin);
}
diff --git a/sql/log_event.h b/sql/log_event.h
index c76e538618b..dfbefdb359e 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -2911,7 +2911,7 @@ public:
Binlog_checkpoint_log_event(const char *binlog_file_name_arg,
uint binlog_file_len_arg);
#ifdef HAVE_REPLICATION
- void pack_info(Protocol *protocol);
+ void pack_info(THD *thd, Protocol *protocol);
#endif
#else
void print(FILE *file, PRINT_EVENT_INFO *print_event_info);
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 3c0f209235a..fa6b085c3ad 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -715,7 +715,8 @@ char **orig_argv;
#ifdef HAVE_PSI_INTERFACE
#ifdef HAVE_MMAP
-PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active, key_LOCK_pool;
+PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active, key_LOCK_pool,
+ key_LOCK_pending_checkpoint;
#endif /* HAVE_MMAP */
#ifdef HAVE_OPENSSL
@@ -756,6 +757,7 @@ static PSI_mutex_info all_server_mutexes[]=
{ &key_LOCK_sync, "TC_LOG_MMAP::LOCK_sync", 0},
{ &key_LOCK_active, "TC_LOG_MMAP::LOCK_active", 0},
{ &key_LOCK_pool, "TC_LOG_MMAP::LOCK_pool", 0},
+ { &key_LOCK_pool, "TC_LOG_MMAP::LOCK_pending_checkpoint", 0},
#endif /* HAVE_MMAP */
#ifdef HAVE_OPENSSL
@@ -4418,7 +4420,7 @@ a file name for --log-bin-index option", opt_binlog_index_name);
}
if (opt_bin_log && mysql_bin_log.open(opt_bin_logname, LOG_BIN, 0,
- WRITE_CACHE, 0, max_binlog_size, 0, TRUE))
+ WRITE_CACHE, max_binlog_size, 0, TRUE))
unireg_abort(1);
#ifdef HAVE_REPLICATION
diff --git a/sql/mysqld.h b/sql/mysqld.h
index 28c4d771a48..62d2426d692 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -218,7 +218,7 @@ extern pthread_key(MEM_ROOT**,THR_MALLOC);
#ifdef HAVE_PSI_INTERFACE
#ifdef HAVE_MMAP
extern PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active,
- key_LOCK_pool;
+ key_LOCK_pool, key_LOCK_pending_checkpoint;
#endif /* HAVE_MMAP */
#ifdef HAVE_OPENSSL
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 252b4f3f5b9..a700aff0da9 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -213,7 +213,7 @@ a file name for --relay-log-index option", opt_relaylog_index_name);
but a destructor will take care of that
*/
if (rli->relay_log.open_index_file(opt_relaylog_index_name, ln, TRUE) ||
- rli->relay_log.open(ln, LOG_BIN, 0, SEQ_READ_APPEND, 0,
+ rli->relay_log.open(ln, LOG_BIN, 0, SEQ_READ_APPEND,
(max_relay_log_size ? max_relay_log_size :
max_binlog_size), 1, TRUE))
{
diff --git a/sql/slave.cc b/sql/slave.cc
index 8869ccb7004..7b7bddecd17 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1757,13 +1757,12 @@ past_checksum:
/* Announce MariaDB slave capabilities. */
DBUG_EXECUTE_IF("simulate_slave_capability_none", goto after_set_capability;);
{
- const char *q=
- DBUG_EVALUATE_IF("simulate_slave_capability_old_53",
- "SET @mariadb_slave_capability="
- STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_ANNOTATE),
- "SET @mariadb_slave_capability="
- STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_MINE));
- if (mysql_real_query(mysql, q, strlen(q)))
+ int rc= DBUG_EVALUATE_IF("simulate_slave_capability_old_53",
+ mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability="
+ STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_ANNOTATE))),
+ mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability="
+ STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_MINE))));
+ if (rc)
{
err_code= mysql_errno(mysql);
if (is_network_error(err_code))
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 054f267ed01..4943a39f491 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -149,9 +149,6 @@ public:
};
-#define TC_LOG_PAGE_SIZE 8192
-#define TC_LOG_MIN_SIZE (3*TC_LOG_PAGE_SIZE)
-
#define TC_HEURISTIC_RECOVER_COMMIT 1
#define TC_HEURISTIC_RECOVER_ROLLBACK 2
extern ulong tc_heuristic_recover;
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 81bb4a1c3eb..71fcacbd0e8 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -347,6 +347,7 @@ static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
static int innobase_savepoint(handlerton *hton, THD* thd, void *savepoint);
static int innobase_release_savepoint(handlerton *hton, THD* thd,
void *savepoint);
+static void innobase_checkpoint_request(handlerton *hton, void *cookie);
static handler *innobase_create_handler(handlerton *hton,
TABLE_SHARE *table,
MEM_ROOT *mem_root);
@@ -2250,6 +2251,7 @@ innobase_init(
innobase_hton->recover=innobase_xa_recover;
innobase_hton->commit_by_xid=innobase_commit_by_xid;
innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
+ innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
innobase_hton->close_cursor_read_view=innobase_close_cursor_view;
@@ -3007,6 +3009,19 @@ innobase_rollback_trx(
}
/*****************************************************************//**
+Handle a commit checkpoint request from server layer.
+We simply flush the redo log immediately and do the notify call.*/
+static
+void
+innobase_checkpoint_request(
+ handlerton *hton,
+ void *cookie)
+{
+ log_buffer_flush_to_disk();
+ commit_checkpoint_notify_ha(hton, cookie);
+}
+
+/*****************************************************************//**
Rolls back a transaction to a savepoint.
@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
given name */
@@ -11460,10 +11475,17 @@ static MYSQL_SYSVAR_STR(file_format_max, innobase_file_format_max,
static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
PLUGIN_VAR_OPCMDARG,
- "Set to 0 (write and flush once per second),"
- " 1 (write and flush at each commit)"
- " or 2 (write at commit, flush once per second).",
- NULL, NULL, 1, 0, 2, 0);
+ "Controls the durability/speed trade-off for commits."
+ " Set to 0 (write and flush redo log to disk only once per second),"
+ " 1 (flush to disk at each commit),"
+ " 2 (write to log at commit but flush to disk only once per second)"
+ " or 3 (flush to disk at prepare and at commit, slower and usually redundant)."
+ " 1 and 3 guarantees that after a crash, committed transactions will"
+ " not be lost and will be consistent with the binlog and other transactional"
+ " engines. 2 can get inconsistent and lose transactions if there is a"
+ " power failure or kernel crash but not if mysqld crashes. 0 has no"
+ " guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
+ NULL, NULL, 1, 0, 3, 0);
static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
index 85246aa6d1f..78191c4c1b4 100644
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innobase/trx/trx0trx.c
@@ -1025,7 +1025,8 @@ trx_commit_off_kernel(
trx->must_flush_log_later = TRUE;
} else if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1) {
+ } else if (srv_flush_log_at_trx_commit == 1 ||
+ srv_flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */
@@ -1712,7 +1713,11 @@ trx_commit_complete_for_mysql(
/* Do nothing */
} else if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1) {
+ } else if (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered) {
+ /* Do nothing - we already flushed the prepare and binlog write
+ to disk, so transaction is durable (will be recovered from
+ binlog if necessary) */
+ } else if (srv_flush_log_at_trx_commit == 1 || srv_flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */
@@ -1992,7 +1997,7 @@ trx_prepare_off_kernel(
if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1) {
+ } else if (srv_flush_log_at_trx_commit == 1 || srv_flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index db0bd40c9b6..e4da37286bb 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -383,6 +383,7 @@ static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
static int innobase_savepoint(handlerton *hton, THD* thd, void *savepoint);
static int innobase_release_savepoint(handlerton *hton, THD* thd,
void *savepoint);
+static void innobase_checkpoint_request(handlerton *hton, void *cookie);
static handler *innobase_create_handler(handlerton *hton,
TABLE_SHARE *table,
MEM_ROOT *mem_root);
@@ -483,10 +484,17 @@ static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
- "Set to 0 (write and flush once per second),"
- " 1 (write and flush at each commit)"
- " or 2 (write at commit, flush once per second).",
- NULL, NULL, 1, 0, 2, 0);
+ "Controls the durability/speed trade-off for commits."
+ " Set to 0 (write and flush redo log to disk only once per second),"
+ " 1 (flush to disk at each commit),"
+ " 2 (write to log at commit but flush to disk only once per second)"
+ " or 3 (flush to disk at prepare and at commit, slower and usually redundant)."
+ " 1 and 3 guarantees that after a crash, committed transactions will"
+ " not be lost and will be consistent with the binlog and other transactional"
+ " engines. 2 can get inconsistent and lose transactions if there is a"
+ " power failure or kernel crash but not if mysqld crashes. 0 has no"
+ " guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
+ NULL, NULL, 1, 0, 3, 0);
static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG,
"In the transaction after enabled, UPDATE, INSERT and DELETE only move the cursor to the records "
@@ -2469,6 +2477,7 @@ innobase_init(
innobase_hton->recover=innobase_xa_recover;
innobase_hton->commit_by_xid=innobase_commit_by_xid;
innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
+ innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
innobase_hton->checkpoint_state= innobase_checkpoint_state;
innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
@@ -3492,6 +3501,19 @@ innobase_rollback_trx(
}
/*****************************************************************//**
+Handle a commit checkpoint request from server layer.
+We simply flush the redo log immediately and do the notify call.*/
+static
+void
+innobase_checkpoint_request(
+ handlerton *hton,
+ void *cookie)
+{
+ log_buffer_flush_to_disk();
+ commit_checkpoint_notify_ha(hton, cookie);
+}
+
+/*****************************************************************//**
Rolls back a transaction to a savepoint.
@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
given name */
diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h
index eded5c303fa..a03f7aceafa 100644
--- a/storage/xtradb/include/trx0trx.h
+++ b/storage/xtradb/include/trx0trx.h
@@ -494,7 +494,6 @@ struct trx_struct{
this is set to 1 then registered should
also be set to 1. This is used in the
XA code */
- unsigned called_commit_ordered:1;/* 1 if innobase_commit_ordered has run. */
/*------------------------------*/
ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
ulint check_foreigns; /* normally TRUE, but if the user
diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c
index c9fccaad16b..b703a04b1b0 100644
--- a/storage/xtradb/trx/trx0trx.c
+++ b/storage/xtradb/trx/trx0trx.c
@@ -1099,7 +1099,8 @@ trx_commit_off_kernel(
trx->must_flush_log_later = TRUE;
} else if (flush_log_at_trx_commit == 0) {
/* Do nothing */
- } else if (flush_log_at_trx_commit == 1) {
+ } else if (flush_log_at_trx_commit == 1 ||
+ flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */
@@ -1809,7 +1810,11 @@ trx_commit_complete_for_mysql(
/* Do nothing */
} else if (flush_log_at_trx_commit == 0) {
/* Do nothing */
- } else if (flush_log_at_trx_commit == 1) {
+ } else if (flush_log_at_trx_commit == 1 && trx->active_commit_ordered) {
+ /* Do nothing - we already flushed the prepare and binlog write
+ to disk, so transaction is durable (will be recovered from
+ binlog if necessary) */
+ } else if (flush_log_at_trx_commit == 1 || flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */
@@ -2097,7 +2102,7 @@ trx_prepare_off_kernel(
if (flush_log_at_trx_commit == 0) {
/* Do nothing */
- } else if (flush_log_at_trx_commit == 1) {
+ } else if (flush_log_at_trx_commit == 1 || flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */