summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/extra/rpl_tests/rpl_parallel.inc1
-rw-r--r--mysql-test/suite/rpl/r/rpl_gtid_grouping.result54
-rw-r--r--mysql-test/suite/rpl/r/rpl_parallel.result1
-rw-r--r--mysql-test/suite/rpl/t/rpl_gtid_grouping.test97
-rw-r--r--sql/rpl_gtid.h1
-rw-r--r--sql/slave.cc164
6 files changed, 292 insertions, 26 deletions
diff --git a/mysql-test/extra/rpl_tests/rpl_parallel.inc b/mysql-test/extra/rpl_tests/rpl_parallel.inc
index b88d2126d4d..9ba7a30f2eb 100644
--- a/mysql-test/extra/rpl_tests/rpl_parallel.inc
+++ b/mysql-test/extra/rpl_tests/rpl_parallel.inc
@@ -1872,6 +1872,7 @@ SET GLOBAL slave_parallel_threads=10;
SET GLOBAL slave_parallel_threads=1;
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,slave_discard_xid_for_gtid_0_x_1000";
+CALL mtr.add_suppression("Unexpected break of being relay-logged GTID");
--connection server_1
INSERT INTO t2 VALUES (101);
diff --git a/mysql-test/suite/rpl/r/rpl_gtid_grouping.result b/mysql-test/suite/rpl/r/rpl_gtid_grouping.result
new file mode 100644
index 00000000000..ad7d6116c49
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_gtid_grouping.result
@@ -0,0 +1,54 @@
+include/master-slave.inc
+[connection master]
+connection slave;
+call mtr.add_suppression("Unexpected break of being relay-logged GTID 0-27697-1000");
+call mtr.add_suppression("Relay log write failure: could not queue event from master");
+call mtr.add_suppression("The current group of events starts with a non-GTID");
+include/stop_slave.inc
+CHANGE MASTER TO MASTER_USE_GTID=slave_pos;
+include/start_slave.inc
+connection master;
+CREATE TABLE t (a INT) ENGINE=innodb;
+INSERT INTO t VALUES(1);
+### A. Simulate an unnoticeable loss of Xid event
+connection slave;
+SET @@global.debug_dbug="+d,slave_discard_xid_for_gtid_0_x_1000";
+connection master;
+SET @@gtid_seq_no=1000;
+set @@server_id=27697;
+INSERT INTO t VALUES(1000);
+set @@server_id=default;
+INSERT INTO t VALUES(1001);
+## Prove the error occurs.
+connection slave;
+include/wait_for_slave_io_error.inc [errno=1595]
+## Prove the slave recovers after the simulation condtion is lifted.
+SET @@global.debug_dbug=default;
+include/start_slave.inc
+### B. Do the same to GTID event.
+connection slave;
+SET @@global.debug_dbug="+d,slave_discard_gtid_0_x_1002";
+connection master;
+SET @@gtid_seq_no=1002;
+set @@server_id=27697;
+INSERT INTO t VALUES(1002);
+set @@server_id=default;
+INSERT INTO t VALUES(1003);
+## Prove the error occurs.
+connection slave;
+include/wait_for_slave_io_error.inc [errno=1595]
+## Prove the slave recovers after the simulation condtion is lifted.
+SET @@global.debug_dbug=default;
+include/start_slave.inc
+connection master;
+connection slave;
+include/diff_tables.inc [master:t,slave:t]
+"===== Clean up ====="
+connection slave;
+include/stop_slave.inc
+CHANGE MASTER TO MASTER_USE_GTID=no;
+include/start_slave.inc
+connection master;
+DROP TABLE t;
+SET GLOBAL LOG_WARNINGS=default;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result
index 657b3ba7448..2601b30279e 100644
--- a/mysql-test/suite/rpl/r/rpl_parallel.result
+++ b/mysql-test/suite/rpl/r/rpl_parallel.result
@@ -1378,6 +1378,7 @@ include/stop_slave.inc
SET GLOBAL slave_parallel_threads=1;
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,slave_discard_xid_for_gtid_0_x_1000";
+CALL mtr.add_suppression("Unexpected break of being relay-logged GTID");
connection server_1;
INSERT INTO t2 VALUES (101);
INSERT INTO t2 VALUES (102);
diff --git a/mysql-test/suite/rpl/t/rpl_gtid_grouping.test b/mysql-test/suite/rpl/t/rpl_gtid_grouping.test
new file mode 100644
index 00000000000..66448c4f96c
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_gtid_grouping.test
@@ -0,0 +1,97 @@
+# ==== Purpose ====
+#
+# Test verifies that replicated transaction boundaries are set properly
+# at receiving from master time.
+#
+# ==== Implementation ====
+#
+# A. Simulate an unnoticeable loss of Xid event to observe a slave error,
+# then restart slave to recover from the failure.
+# B. Do the same to GTID event.
+#
+# ==== References ====
+#
+# MDEV-27697 slave must recognize incomplete replication event group
+#
+--source include/have_binlog_format_mixed.inc
+--source include/have_innodb.inc
+--source include/have_debug.inc
+--source include/master-slave.inc
+
+--connection slave
+call mtr.add_suppression("Unexpected break of being relay-logged GTID 0-27697-1000");
+call mtr.add_suppression("Relay log write failure: could not queue event from master");
+call mtr.add_suppression("The current group of events starts with a non-GTID");
+
+--source include/stop_slave.inc
+CHANGE MASTER TO MASTER_USE_GTID=slave_pos;
+--source include/start_slave.inc
+
+--connection master
+CREATE TABLE t (a INT) ENGINE=innodb;
+INSERT INTO t VALUES(1);
+save_master_pos;
+
+--echo ### A. Simulate an unnoticeable loss of Xid event
+--sync_slave_with_master
+SET @@global.debug_dbug="+d,slave_discard_xid_for_gtid_0_x_1000";
+
+--connection master
+SET @@gtid_seq_no=1000;
+set @@server_id=27697;
+INSERT INTO t VALUES(1000);
+set @@server_id=default;
+INSERT INTO t VALUES(1001);
+
+--echo ## Prove the error occurs.
+--connection slave
+# ER_SLAVE_RELAY_LOG_WRITE_FAILURE
+--let $slave_io_errno = 1595
+--source include/wait_for_slave_io_error.inc
+## EOP
+
+--echo ## Prove the slave recovers after the simulation condtion is lifted.
+SET @@global.debug_dbug=default;
+--source include/start_slave.inc
+
+--echo ### B. Do the same to GTID event.
+--connection slave
+SET @@global.debug_dbug="+d,slave_discard_gtid_0_x_1002";
+
+--connection master
+SET @@gtid_seq_no=1002;
+set @@server_id=27697;
+INSERT INTO t VALUES(1002);
+set @@server_id=default;
+INSERT INTO t VALUES(1003);
+
+--echo ## Prove the error occurs.
+--connection slave
+# ER_SLAVE_RELAY_LOG_WRITE_FAILURE
+--let $slave_io_errno = 1595
+--source include/wait_for_slave_io_error.inc
+## EOP
+
+--echo ## Prove the slave recovers after the simulation condtion is lifted.
+SET @@global.debug_dbug=default;
+--source include/start_slave.inc
+
+--connection master
+save_master_pos;
+
+--sync_slave_with_master
+## EOP
+
+--let $diff_tables=master:t,slave:t
+--source include/diff_tables.inc
+
+--echo "===== Clean up ====="
+--connection slave
+--source include/stop_slave.inc
+CHANGE MASTER TO MASTER_USE_GTID=no;
+--source include/start_slave.inc
+
+--connection master
+DROP TABLE t;
+SET GLOBAL LOG_WARNINGS=default;
+--source include/rpl_end.inc
diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h
index e4949ff0d39..bb171f3d95d 100644
--- a/sql/rpl_gtid.h
+++ b/sql/rpl_gtid.h
@@ -26,6 +26,7 @@
extern const LEX_STRING rpl_gtid_slave_state_table_name;
class String;
+#define PARAM_GTID(G) G.domain_id, G.server_id, G.seq_no
struct rpl_gtid
{
diff --git a/sql/slave.cc b/sql/slave.cc
index 2ff1a0490e9..31e50753c9e 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -6196,23 +6196,75 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
}
}
- if (unlikely(mi->gtid_reconnect_event_skip_count))
- {
- goto default_action;
- }
-
/*
- We have successfully queued to relay log everything before this GTID, so
+ Unless the previous group is malformed,
+ we have successfully queued to relay log everything before this GTID, so
in case of reconnect we can start from after any previous GTID.
- (Normally we would have updated gtid_current_pos earlier at the end of
- the previous event group, but better leave an extra check here for
- safety).
+ (We must have updated gtid_current_pos earlier at the end of
+ the previous event group. Unless ...)
*/
- if (mi->events_queued_since_last_gtid)
+ if (unlikely(mi->events_queued_since_last_gtid >
+ mi->gtid_reconnect_event_skip_count))
{
- mi->gtid_current_pos.update(&mi->last_queued_gtid);
- mi->events_queued_since_last_gtid= 0;
+ /*
+ ...unless the last group has not been completed. An assert below
+ can be satisfied only with the strict mode that ensures
+ against "genuine" gtid duplicates.
+ */
+ rpl_gtid *gtid_in_slave_state=
+ mi->gtid_current_pos.find(mi->last_queued_gtid.domain_id);
+
+ // Slave gtid state must not have updated yet to the last received gtid.
+ DBUG_ASSERT((mi->using_gtid == Master_info::USE_GTID_NO ||
+ !opt_gtid_strict_mode) ||
+ (!gtid_in_slave_state ||
+ !(*gtid_in_slave_state == mi->last_queued_gtid)));
+
+ DBUG_EXECUTE_IF("slave_discard_xid_for_gtid_0_x_1000",
+ {
+ /* Inject an event group that is missing its XID commit event. */
+ if (mi->last_queued_gtid.domain_id == 0 &&
+ mi->last_queued_gtid.seq_no == 1000)
+ {
+ sql_print_warning(
+ "Unexpected break of being relay-logged GTID %u-%u-%llu "
+ "event group by the current GTID event %u-%u-%llu",
+ PARAM_GTID(mi->last_queued_gtid),PARAM_GTID(event_gtid));
+ DBUG_SET("-d,slave_discard_xid_for_gtid_0_x_1000");
+ goto dbug_gtid_accept;
+ }
+ });
+ error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+ sql_print_error("Unexpected break of being relay-logged GTID %u-%u-%llu "
+ "event group by the current GTID event %u-%u-%llu",
+ PARAM_GTID(mi->last_queued_gtid),PARAM_GTID(event_gtid));
+ goto err;
+ }
+ else if (unlikely(mi->gtid_reconnect_event_skip_count > 0))
+ {
+ if (mi->gtid_reconnect_event_skip_count ==
+ mi->events_queued_since_last_gtid)
+ {
+ DBUG_ASSERT(event_gtid == mi->last_queued_gtid);
+
+ goto default_action;
+ }
+
+ DBUG_ASSERT(0);
}
+ // else_likely{...
+#ifndef DBUG_OFF
+dbug_gtid_accept:
+ DBUG_EXECUTE_IF("slave_discard_gtid_0_x_1002",
+ {
+ if (mi->last_queued_gtid.server_id == 27697 &&
+ mi->last_queued_gtid.seq_no == 1002)
+ {
+ DBUG_SET("-d,slave_discard_gtid_0_x_1002");
+ goto skip_relay_logging;
+ }
+ });
+#endif
mi->last_queued_gtid= event_gtid;
mi->last_queued_gtid_standalone=
(gtid_flag & Gtid_log_event::FL_STANDALONE) != 0;
@@ -6222,6 +6274,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
++mi->events_queued_since_last_gtid;
inc_pos= event_len;
+ // ...} eof else_likely
}
break;
/*
@@ -6274,6 +6327,12 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
case XID_EVENT:
DBUG_EXECUTE_IF("slave_discard_xid_for_gtid_0_x_1000",
{
+ if (mi->last_queued_gtid.server_id == 27697 &&
+ mi->last_queued_gtid.seq_no == 1000)
+ {
+ DBUG_SET("-d,slave_discard_xid_for_gtid_0_x_1000");
+ goto skip_relay_logging;
+ }
/* Inject an event group that is missing its XID commit event. */
if (mi->last_queued_gtid.domain_id == 0 &&
mi->last_queued_gtid.seq_no == 1000)
@@ -6319,15 +6378,48 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
}
};);
- if (mi->using_gtid != Master_info::USE_GTID_NO && mi->gtid_event_seen)
+ if (mi->using_gtid != Master_info::USE_GTID_NO)
{
- if (unlikely(mi->gtid_reconnect_event_skip_count))
+ if (likely(mi->gtid_event_seen))
{
- --mi->gtid_reconnect_event_skip_count;
- gtid_skip_enqueue= true;
+ if (unlikely(mi->gtid_reconnect_event_skip_count))
+ {
+ if (!got_gtid_event &&
+ mi->gtid_reconnect_event_skip_count ==
+ mi->events_queued_since_last_gtid)
+ goto gtid_not_start; // the 1st re-sent must be gtid
+
+ --mi->gtid_reconnect_event_skip_count;
+ gtid_skip_enqueue= true;
+ }
+ else if (likely(mi->events_queued_since_last_gtid))
+ {
+ DBUG_ASSERT(!got_gtid_event);
+
+ ++mi->events_queued_since_last_gtid;
+ }
+ else if (Log_event::is_group_event((Log_event_type) (uchar)
+ buf[EVENT_TYPE_OFFSET]))
+ {
+ goto gtid_not_start; // no first gtid event in this group
+ }
+ }
+ else if (Log_event::is_group_event((Log_event_type) (uchar)
+ buf[EVENT_TYPE_OFFSET]))
+ {
+ gtid_not_start:
+
+ DBUG_ASSERT(!got_gtid_event);
+
+ error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+ sql_print_error("The current group of events starts with "
+ "a non-GTID %s event; "
+ "the last seen GTID is %u-%u-%llu",
+ Log_event::get_type_str((Log_event_type) (uchar)
+ buf[EVENT_TYPE_OFFSET]),
+ mi->last_queued_gtid);
+ goto err;
}
- else if (mi->events_queued_since_last_gtid)
- ++mi->events_queued_since_last_gtid;
}
if (!is_compress_event)
@@ -6500,15 +6592,35 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
Query_log_event::peek_is_commit_rollback(buf, event_len,
checksum_alg))))))
{
- /*
- The whole of the current event group is queued. So in case of
- reconnect we can start from after the current GTID.
- */
- mi->gtid_current_pos.update(&mi->last_queued_gtid);
- mi->events_queued_since_last_gtid= 0;
+ DBUG_ASSERT(mi->events_queued_since_last_gtid > 1);
- /* Reset the domain_id_filter flag. */
- mi->domain_id_filter.reset_filter();
+ if (unlikely(gtid_skip_enqueue))
+ {
+ error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+ sql_print_error("Recieved a group closing %s event "
+ "at %llu position in the group while there are "
+ "still %llu events to skip upon reconnecting; "
+ "the last seen GTID is %u-%u-%llu",
+ Log_event::get_type_str((Log_event_type) (uchar)
+ buf[EVENT_TYPE_OFFSET]),
+ (mi->events_queued_since_last_gtid -
+ mi->gtid_reconnect_event_skip_count),
+ mi->events_queued_since_last_gtid,
+ mi->last_queued_gtid);
+ goto err;
+ }
+ else
+ {
+ /*
+ The whole of the current event group is queued. So in case of
+ reconnect we can start from after the current GTID.
+ */
+ mi->gtid_current_pos.update(&mi->last_queued_gtid);
+ mi->events_queued_since_last_gtid= 0;
+
+ /* Reset the domain_id_filter flag. */
+ mi->domain_id_filter.reset_filter();
+ }
}
skip_relay_logging: