summaryrefslogtreecommitdiff
path: root/sql/slave.cc
diff options
context:
space:
mode:
authorAndrei <andrei.elkin@mariadb.com>2022-04-14 18:59:24 +0300
committerAndrei <andrei.elkin@mariadb.com>2022-04-25 16:00:35 +0300
commit1bcdc3e9eb1a393fd07403c874ac7d7edc6d8a0a (patch)
tree0244016b4f5522ff53c905eaee3154cec715f1d8 /sql/slave.cc
parent907e4c62cec951646e9049dffc41a0a6eeeb821d (diff)
downloadmariadb-git-1bcdc3e9eb1a393fd07403c874ac7d7edc6d8a0a.tar.gz
MDEV-27697 slave must recognize incomplete replication event group
In cases of a faulty master or an incorrect binlog event producer, that slave is working with, sends an incomplete group of events slave must react with an error to not to log into the relay-log any new events that do not belong to the incomplete group. Fixed with extending received event properties check when slave connects to master in gtid mode. Specifically for the event that can be a part of a group its relay-logging is permitted only when its position within the group is validated. Otherwise slave IO thread stops with ER_SLAVE_RELAY_LOG_WRITE_FAILURE.
Diffstat (limited to 'sql/slave.cc')
-rw-r--r--sql/slave.cc164
1 files changed, 138 insertions, 26 deletions
diff --git a/sql/slave.cc b/sql/slave.cc
index 2ff1a0490e9..31e50753c9e 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -6196,23 +6196,75 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
}
}
- if (unlikely(mi->gtid_reconnect_event_skip_count))
- {
- goto default_action;
- }
-
/*
- We have successfully queued to relay log everything before this GTID, so
+ Unless the previous group is malformed,
+ we have successfully queued to relay log everything before this GTID, so
in case of reconnect we can start from after any previous GTID.
- (Normally we would have updated gtid_current_pos earlier at the end of
- the previous event group, but better leave an extra check here for
- safety).
+ (We must have updated gtid_current_pos earlier at the end of
+ the previous event group. Unless ...)
*/
- if (mi->events_queued_since_last_gtid)
+ if (unlikely(mi->events_queued_since_last_gtid >
+ mi->gtid_reconnect_event_skip_count))
{
- mi->gtid_current_pos.update(&mi->last_queued_gtid);
- mi->events_queued_since_last_gtid= 0;
+ /*
+ ...unless the last group has not been completed. An assert below
+ can be satisfied only with the strict mode that ensures
+ against "genuine" gtid duplicates.
+ */
+ rpl_gtid *gtid_in_slave_state=
+ mi->gtid_current_pos.find(mi->last_queued_gtid.domain_id);
+
+ // Slave gtid state must not have updated yet to the last received gtid.
+ DBUG_ASSERT((mi->using_gtid == Master_info::USE_GTID_NO ||
+ !opt_gtid_strict_mode) ||
+ (!gtid_in_slave_state ||
+ !(*gtid_in_slave_state == mi->last_queued_gtid)));
+
+ DBUG_EXECUTE_IF("slave_discard_xid_for_gtid_0_x_1000",
+ {
+ /* Inject an event group that is missing its XID commit event. */
+ if (mi->last_queued_gtid.domain_id == 0 &&
+ mi->last_queued_gtid.seq_no == 1000)
+ {
+ sql_print_warning(
+ "Unexpected break of being relay-logged GTID %u-%u-%llu "
+ "event group by the current GTID event %u-%u-%llu",
+ PARAM_GTID(mi->last_queued_gtid),PARAM_GTID(event_gtid));
+ DBUG_SET("-d,slave_discard_xid_for_gtid_0_x_1000");
+ goto dbug_gtid_accept;
+ }
+ });
+ error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+ sql_print_error("Unexpected break of being relay-logged GTID %u-%u-%llu "
+ "event group by the current GTID event %u-%u-%llu",
+ PARAM_GTID(mi->last_queued_gtid),PARAM_GTID(event_gtid));
+ goto err;
+ }
+ else if (unlikely(mi->gtid_reconnect_event_skip_count > 0))
+ {
+ if (mi->gtid_reconnect_event_skip_count ==
+ mi->events_queued_since_last_gtid)
+ {
+ DBUG_ASSERT(event_gtid == mi->last_queued_gtid);
+
+ goto default_action;
+ }
+
+ DBUG_ASSERT(0);
}
+ // else_likely{...
+#ifndef DBUG_OFF
+dbug_gtid_accept:
+ DBUG_EXECUTE_IF("slave_discard_gtid_0_x_1002",
+ {
+ if (mi->last_queued_gtid.server_id == 27697 &&
+ mi->last_queued_gtid.seq_no == 1002)
+ {
+ DBUG_SET("-d,slave_discard_gtid_0_x_1002");
+ goto skip_relay_logging;
+ }
+ });
+#endif
mi->last_queued_gtid= event_gtid;
mi->last_queued_gtid_standalone=
(gtid_flag & Gtid_log_event::FL_STANDALONE) != 0;
@@ -6222,6 +6274,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
++mi->events_queued_since_last_gtid;
inc_pos= event_len;
+ // ...} eof else_likely
}
break;
/*
@@ -6274,6 +6327,12 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
case XID_EVENT:
DBUG_EXECUTE_IF("slave_discard_xid_for_gtid_0_x_1000",
{
+ if (mi->last_queued_gtid.server_id == 27697 &&
+ mi->last_queued_gtid.seq_no == 1000)
+ {
+ DBUG_SET("-d,slave_discard_xid_for_gtid_0_x_1000");
+ goto skip_relay_logging;
+ }
/* Inject an event group that is missing its XID commit event. */
if (mi->last_queued_gtid.domain_id == 0 &&
mi->last_queued_gtid.seq_no == 1000)
@@ -6319,15 +6378,48 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
}
};);
- if (mi->using_gtid != Master_info::USE_GTID_NO && mi->gtid_event_seen)
+ if (mi->using_gtid != Master_info::USE_GTID_NO)
{
- if (unlikely(mi->gtid_reconnect_event_skip_count))
+ if (likely(mi->gtid_event_seen))
{
- --mi->gtid_reconnect_event_skip_count;
- gtid_skip_enqueue= true;
+ if (unlikely(mi->gtid_reconnect_event_skip_count))
+ {
+ if (!got_gtid_event &&
+ mi->gtid_reconnect_event_skip_count ==
+ mi->events_queued_since_last_gtid)
+ goto gtid_not_start; // the 1st re-sent must be gtid
+
+ --mi->gtid_reconnect_event_skip_count;
+ gtid_skip_enqueue= true;
+ }
+ else if (likely(mi->events_queued_since_last_gtid))
+ {
+ DBUG_ASSERT(!got_gtid_event);
+
+ ++mi->events_queued_since_last_gtid;
+ }
+ else if (Log_event::is_group_event((Log_event_type) (uchar)
+ buf[EVENT_TYPE_OFFSET]))
+ {
+ goto gtid_not_start; // no first gtid event in this group
+ }
+ }
+ else if (Log_event::is_group_event((Log_event_type) (uchar)
+ buf[EVENT_TYPE_OFFSET]))
+ {
+ gtid_not_start:
+
+ DBUG_ASSERT(!got_gtid_event);
+
+ error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+ sql_print_error("The current group of events starts with "
+ "a non-GTID %s event; "
+ "the last seen GTID is %u-%u-%llu",
+ Log_event::get_type_str((Log_event_type) (uchar)
+ buf[EVENT_TYPE_OFFSET]),
+ mi->last_queued_gtid);
+ goto err;
}
- else if (mi->events_queued_since_last_gtid)
- ++mi->events_queued_since_last_gtid;
}
if (!is_compress_event)
@@ -6500,15 +6592,35 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
Query_log_event::peek_is_commit_rollback(buf, event_len,
checksum_alg))))))
{
- /*
- The whole of the current event group is queued. So in case of
- reconnect we can start from after the current GTID.
- */
- mi->gtid_current_pos.update(&mi->last_queued_gtid);
- mi->events_queued_since_last_gtid= 0;
+ DBUG_ASSERT(mi->events_queued_since_last_gtid > 1);
- /* Reset the domain_id_filter flag. */
- mi->domain_id_filter.reset_filter();
+ if (unlikely(gtid_skip_enqueue))
+ {
+ error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
+ sql_print_error("Recieved a group closing %s event "
+ "at %llu position in the group while there are "
+ "still %llu events to skip upon reconnecting; "
+ "the last seen GTID is %u-%u-%llu",
+ Log_event::get_type_str((Log_event_type) (uchar)
+ buf[EVENT_TYPE_OFFSET]),
+ (mi->events_queued_since_last_gtid -
+ mi->gtid_reconnect_event_skip_count),
+ mi->events_queued_since_last_gtid,
+ mi->last_queued_gtid);
+ goto err;
+ }
+ else
+ {
+ /*
+ The whole of the current event group is queued. So in case of
+ reconnect we can start from after the current GTID.
+ */
+ mi->gtid_current_pos.update(&mi->last_queued_gtid);
+ mi->events_queued_since_last_gtid= 0;
+
+ /* Reset the domain_id_filter flag. */
+ mi->domain_id_filter.reset_filter();
+ }
}
skip_relay_logging: