diff options
-rw-r--r-- | mysql-test/extra/rpl_tests/rpl_parallel.inc | 1 | ||||
-rw-r--r-- | mysql-test/suite/rpl/r/rpl_gtid_grouping.result | 54 | ||||
-rw-r--r-- | mysql-test/suite/rpl/r/rpl_parallel.result | 1 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_gtid_grouping.test | 97 | ||||
-rw-r--r-- | sql/rpl_gtid.h | 1 | ||||
-rw-r--r-- | sql/slave.cc | 164 |
6 files changed, 292 insertions, 26 deletions
diff --git a/mysql-test/extra/rpl_tests/rpl_parallel.inc b/mysql-test/extra/rpl_tests/rpl_parallel.inc index b88d2126d4d..9ba7a30f2eb 100644 --- a/mysql-test/extra/rpl_tests/rpl_parallel.inc +++ b/mysql-test/extra/rpl_tests/rpl_parallel.inc @@ -1872,6 +1872,7 @@ SET GLOBAL slave_parallel_threads=10; SET GLOBAL slave_parallel_threads=1; SET @old_dbug= @@GLOBAL.debug_dbug; SET GLOBAL debug_dbug="+d,slave_discard_xid_for_gtid_0_x_1000"; +CALL mtr.add_suppression("Unexpected break of being relay-logged GTID"); --connection server_1 INSERT INTO t2 VALUES (101); diff --git a/mysql-test/suite/rpl/r/rpl_gtid_grouping.result b/mysql-test/suite/rpl/r/rpl_gtid_grouping.result new file mode 100644 index 00000000000..ad7d6116c49 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_gtid_grouping.result @@ -0,0 +1,54 @@ +include/master-slave.inc +[connection master] +connection slave; +call mtr.add_suppression("Unexpected break of being relay-logged GTID 0-27697-1000"); +call mtr.add_suppression("Relay log write failure: could not queue event from master"); +call mtr.add_suppression("The current group of events starts with a non-GTID"); +include/stop_slave.inc +CHANGE MASTER TO MASTER_USE_GTID=slave_pos; +include/start_slave.inc +connection master; +CREATE TABLE t (a INT) ENGINE=innodb; +INSERT INTO t VALUES(1); +### A. Simulate an unnoticeable loss of Xid event +connection slave; +SET @@global.debug_dbug="+d,slave_discard_xid_for_gtid_0_x_1000"; +connection master; +SET @@gtid_seq_no=1000; +set @@server_id=27697; +INSERT INTO t VALUES(1000); +set @@server_id=default; +INSERT INTO t VALUES(1001); +## Prove the error occurs. +connection slave; +include/wait_for_slave_io_error.inc [errno=1595] +## Prove the slave recovers after the simulation condtion is lifted. +SET @@global.debug_dbug=default; +include/start_slave.inc +### B. Do the same to GTID event. +connection slave; +SET @@global.debug_dbug="+d,slave_discard_gtid_0_x_1002"; +connection master; +SET @@gtid_seq_no=1002; +set @@server_id=27697; +INSERT INTO t VALUES(1002); +set @@server_id=default; +INSERT INTO t VALUES(1003); +## Prove the error occurs. +connection slave; +include/wait_for_slave_io_error.inc [errno=1595] +## Prove the slave recovers after the simulation condtion is lifted. +SET @@global.debug_dbug=default; +include/start_slave.inc +connection master; +connection slave; +include/diff_tables.inc [master:t,slave:t] +"===== Clean up =====" +connection slave; +include/stop_slave.inc +CHANGE MASTER TO MASTER_USE_GTID=no; +include/start_slave.inc +connection master; +DROP TABLE t; +SET GLOBAL LOG_WARNINGS=default; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result index 657b3ba7448..2601b30279e 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel.result +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -1378,6 +1378,7 @@ include/stop_slave.inc SET GLOBAL slave_parallel_threads=1; SET @old_dbug= @@GLOBAL.debug_dbug; SET GLOBAL debug_dbug="+d,slave_discard_xid_for_gtid_0_x_1000"; +CALL mtr.add_suppression("Unexpected break of being relay-logged GTID"); connection server_1; INSERT INTO t2 VALUES (101); INSERT INTO t2 VALUES (102); diff --git a/mysql-test/suite/rpl/t/rpl_gtid_grouping.test b/mysql-test/suite/rpl/t/rpl_gtid_grouping.test new file mode 100644 index 00000000000..66448c4f96c --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_gtid_grouping.test @@ -0,0 +1,97 @@ +# ==== Purpose ==== +# +# Test verifies that replicated transaction boundaries are set properly +# at receiving from master time. +# +# ==== Implementation ==== +# +# A. Simulate an unnoticeable loss of Xid event to observe a slave error, +# then restart slave to recover from the failure. +# B. Do the same to GTID event. +# +# ==== References ==== +# +# MDEV-27697 slave must recognize incomplete replication event group +# +--source include/have_binlog_format_mixed.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/master-slave.inc + +--connection slave +call mtr.add_suppression("Unexpected break of being relay-logged GTID 0-27697-1000"); +call mtr.add_suppression("Relay log write failure: could not queue event from master"); +call mtr.add_suppression("The current group of events starts with a non-GTID"); + +--source include/stop_slave.inc +CHANGE MASTER TO MASTER_USE_GTID=slave_pos; +--source include/start_slave.inc + +--connection master +CREATE TABLE t (a INT) ENGINE=innodb; +INSERT INTO t VALUES(1); +save_master_pos; + +--echo ### A. Simulate an unnoticeable loss of Xid event +--sync_slave_with_master +SET @@global.debug_dbug="+d,slave_discard_xid_for_gtid_0_x_1000"; + +--connection master +SET @@gtid_seq_no=1000; +set @@server_id=27697; +INSERT INTO t VALUES(1000); +set @@server_id=default; +INSERT INTO t VALUES(1001); + +--echo ## Prove the error occurs. +--connection slave +# ER_SLAVE_RELAY_LOG_WRITE_FAILURE +--let $slave_io_errno = 1595 +--source include/wait_for_slave_io_error.inc +## EOP + +--echo ## Prove the slave recovers after the simulation condtion is lifted. +SET @@global.debug_dbug=default; +--source include/start_slave.inc + +--echo ### B. Do the same to GTID event. +--connection slave +SET @@global.debug_dbug="+d,slave_discard_gtid_0_x_1002"; + +--connection master +SET @@gtid_seq_no=1002; +set @@server_id=27697; +INSERT INTO t VALUES(1002); +set @@server_id=default; +INSERT INTO t VALUES(1003); + +--echo ## Prove the error occurs. +--connection slave +# ER_SLAVE_RELAY_LOG_WRITE_FAILURE +--let $slave_io_errno = 1595 +--source include/wait_for_slave_io_error.inc +## EOP + +--echo ## Prove the slave recovers after the simulation condtion is lifted. +SET @@global.debug_dbug=default; +--source include/start_slave.inc + +--connection master +save_master_pos; + +--sync_slave_with_master +## EOP + +--let $diff_tables=master:t,slave:t +--source include/diff_tables.inc + +--echo "===== Clean up =====" +--connection slave +--source include/stop_slave.inc +CHANGE MASTER TO MASTER_USE_GTID=no; +--source include/start_slave.inc + +--connection master +DROP TABLE t; +SET GLOBAL LOG_WARNINGS=default; +--source include/rpl_end.inc diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h index e4949ff0d39..bb171f3d95d 100644 --- a/sql/rpl_gtid.h +++ b/sql/rpl_gtid.h @@ -26,6 +26,7 @@ extern const LEX_STRING rpl_gtid_slave_state_table_name; class String; +#define PARAM_GTID(G) G.domain_id, G.server_id, G.seq_no struct rpl_gtid { diff --git a/sql/slave.cc b/sql/slave.cc index 2ff1a0490e9..31e50753c9e 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -6196,23 +6196,75 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) } } - if (unlikely(mi->gtid_reconnect_event_skip_count)) - { - goto default_action; - } - /* - We have successfully queued to relay log everything before this GTID, so + Unless the previous group is malformed, + we have successfully queued to relay log everything before this GTID, so in case of reconnect we can start from after any previous GTID. - (Normally we would have updated gtid_current_pos earlier at the end of - the previous event group, but better leave an extra check here for - safety). + (We must have updated gtid_current_pos earlier at the end of + the previous event group. Unless ...) */ - if (mi->events_queued_since_last_gtid) + if (unlikely(mi->events_queued_since_last_gtid > + mi->gtid_reconnect_event_skip_count)) { - mi->gtid_current_pos.update(&mi->last_queued_gtid); - mi->events_queued_since_last_gtid= 0; + /* + ...unless the last group has not been completed. An assert below + can be satisfied only with the strict mode that ensures + against "genuine" gtid duplicates. + */ + rpl_gtid *gtid_in_slave_state= + mi->gtid_current_pos.find(mi->last_queued_gtid.domain_id); + + // Slave gtid state must not have updated yet to the last received gtid. + DBUG_ASSERT((mi->using_gtid == Master_info::USE_GTID_NO || + !opt_gtid_strict_mode) || + (!gtid_in_slave_state || + !(*gtid_in_slave_state == mi->last_queued_gtid))); + + DBUG_EXECUTE_IF("slave_discard_xid_for_gtid_0_x_1000", + { + /* Inject an event group that is missing its XID commit event. */ + if (mi->last_queued_gtid.domain_id == 0 && + mi->last_queued_gtid.seq_no == 1000) + { + sql_print_warning( + "Unexpected break of being relay-logged GTID %u-%u-%llu " + "event group by the current GTID event %u-%u-%llu", + PARAM_GTID(mi->last_queued_gtid),PARAM_GTID(event_gtid)); + DBUG_SET("-d,slave_discard_xid_for_gtid_0_x_1000"); + goto dbug_gtid_accept; + } + }); + error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; + sql_print_error("Unexpected break of being relay-logged GTID %u-%u-%llu " + "event group by the current GTID event %u-%u-%llu", + PARAM_GTID(mi->last_queued_gtid),PARAM_GTID(event_gtid)); + goto err; + } + else if (unlikely(mi->gtid_reconnect_event_skip_count > 0)) + { + if (mi->gtid_reconnect_event_skip_count == + mi->events_queued_since_last_gtid) + { + DBUG_ASSERT(event_gtid == mi->last_queued_gtid); + + goto default_action; + } + + DBUG_ASSERT(0); } + // else_likely{... +#ifndef DBUG_OFF +dbug_gtid_accept: + DBUG_EXECUTE_IF("slave_discard_gtid_0_x_1002", + { + if (mi->last_queued_gtid.server_id == 27697 && + mi->last_queued_gtid.seq_no == 1002) + { + DBUG_SET("-d,slave_discard_gtid_0_x_1002"); + goto skip_relay_logging; + } + }); +#endif mi->last_queued_gtid= event_gtid; mi->last_queued_gtid_standalone= (gtid_flag & Gtid_log_event::FL_STANDALONE) != 0; @@ -6222,6 +6274,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) ++mi->events_queued_since_last_gtid; inc_pos= event_len; + // ...} eof else_likely } break; /* @@ -6274,6 +6327,12 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) case XID_EVENT: DBUG_EXECUTE_IF("slave_discard_xid_for_gtid_0_x_1000", { + if (mi->last_queued_gtid.server_id == 27697 && + mi->last_queued_gtid.seq_no == 1000) + { + DBUG_SET("-d,slave_discard_xid_for_gtid_0_x_1000"); + goto skip_relay_logging; + } /* Inject an event group that is missing its XID commit event. */ if (mi->last_queued_gtid.domain_id == 0 && mi->last_queued_gtid.seq_no == 1000) @@ -6319,15 +6378,48 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) } };); - if (mi->using_gtid != Master_info::USE_GTID_NO && mi->gtid_event_seen) + if (mi->using_gtid != Master_info::USE_GTID_NO) { - if (unlikely(mi->gtid_reconnect_event_skip_count)) + if (likely(mi->gtid_event_seen)) { - --mi->gtid_reconnect_event_skip_count; - gtid_skip_enqueue= true; + if (unlikely(mi->gtid_reconnect_event_skip_count)) + { + if (!got_gtid_event && + mi->gtid_reconnect_event_skip_count == + mi->events_queued_since_last_gtid) + goto gtid_not_start; // the 1st re-sent must be gtid + + --mi->gtid_reconnect_event_skip_count; + gtid_skip_enqueue= true; + } + else if (likely(mi->events_queued_since_last_gtid)) + { + DBUG_ASSERT(!got_gtid_event); + + ++mi->events_queued_since_last_gtid; + } + else if (Log_event::is_group_event((Log_event_type) (uchar) + buf[EVENT_TYPE_OFFSET])) + { + goto gtid_not_start; // no first gtid event in this group + } + } + else if (Log_event::is_group_event((Log_event_type) (uchar) + buf[EVENT_TYPE_OFFSET])) + { + gtid_not_start: + + DBUG_ASSERT(!got_gtid_event); + + error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; + sql_print_error("The current group of events starts with " + "a non-GTID %s event; " + "the last seen GTID is %u-%u-%llu", + Log_event::get_type_str((Log_event_type) (uchar) + buf[EVENT_TYPE_OFFSET]), + mi->last_queued_gtid); + goto err; } - else if (mi->events_queued_since_last_gtid) - ++mi->events_queued_since_last_gtid; } if (!is_compress_event) @@ -6500,15 +6592,35 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) Query_log_event::peek_is_commit_rollback(buf, event_len, checksum_alg)))))) { - /* - The whole of the current event group is queued. So in case of - reconnect we can start from after the current GTID. - */ - mi->gtid_current_pos.update(&mi->last_queued_gtid); - mi->events_queued_since_last_gtid= 0; + DBUG_ASSERT(mi->events_queued_since_last_gtid > 1); - /* Reset the domain_id_filter flag. */ - mi->domain_id_filter.reset_filter(); + if (unlikely(gtid_skip_enqueue)) + { + error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; + sql_print_error("Recieved a group closing %s event " + "at %llu position in the group while there are " + "still %llu events to skip upon reconnecting; " + "the last seen GTID is %u-%u-%llu", + Log_event::get_type_str((Log_event_type) (uchar) + buf[EVENT_TYPE_OFFSET]), + (mi->events_queued_since_last_gtid - + mi->gtid_reconnect_event_skip_count), + mi->events_queued_since_last_gtid, + mi->last_queued_gtid); + goto err; + } + else + { + /* + The whole of the current event group is queued. So in case of + reconnect we can start from after the current GTID. + */ + mi->gtid_current_pos.update(&mi->last_queued_gtid); + mi->events_queued_since_last_gtid= 0; + + /* Reset the domain_id_filter flag. */ + mi->domain_id_filter.reset_filter(); + } } skip_relay_logging: |