summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrandon Nesterenko <brandon.nesterenko@mariadb.com>2022-01-05 08:35:05 -0700
committerBrandon Nesterenko <brandon.nesterenko@mariadb.com>2022-01-05 08:40:20 -0700
commitddd79e29059dbb065c9f817382e4a0378171ba1d (patch)
treebf8045d13d634a1ef9266fe0ef01bbf83d0d9fb0
parenta48d2ec866751e9da76066bf3a30f99da9031ab0 (diff)
downloadmariadb-git-10.4-MDEV-16091-merge.tar.gz
MDEV-16091: Seconds_Behind_Master spikes to millions of seconds10.4-MDEV-16091-merge
Problem: ======== A slave’s relay log format description event is used when calculating Seconds_Behind_Master (SBM). This forces the SBM value to spike when processing these events, as their creation date is set to the timestamp that the IO thread begins. Solution: ======== When the slave generates a format description event, mark the event as a relay log event so it does not update the rli->last_master_timestamp variable. Reviewed By: ============ Andrei Elkin <andrei.elkin@mariadb.com>
-rw-r--r--mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result41
-rw-r--r--mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test86
-rw-r--r--sql/log.cc1
-rw-r--r--sql/slave.cc9
4 files changed, 137 insertions, 0 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result b/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result
new file mode 100644
index 00000000000..d164ea5434f
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result
@@ -0,0 +1,41 @@
+include/master-slave.inc
+[connection master]
+connection slave;
+include/stop_slave.inc
+SET @save_dbug= @@GLOBAL.debug_dbug;
+SET @@global.debug_dbug="+d,pause_sql_thread_on_fde";
+include/start_slave.inc
+# Future events must be logged at least 2 seconds after
+# the slave starts
+connection master;
+# Write events to ensure slave will be consistent with master
+create table t1 (a int);
+insert into t1 values (1);
+# Flush logs on master forces slave to generate a Format description
+# event in its relay log
+flush logs;
+connection slave;
+# Ignore FDEs that happen before the CREATE/INSERT commands
+SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
+SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
+SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
+SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
+# On the next FDE, the slave should have the master CREATE/INSERT events
+SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
+select count(*)=1 from t1;
+count(*)=1
+1
+# The relay log FDE has been processed - here we check to ensure it was
+# not considered in Seconds_Behind_Master calculation
+connection slave1;
+# Safely resume slave SQL thread
+SET @@global.debug_dbug='';
+SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR';
+SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
+SET DEBUG_SYNC='RESET';
+connection master;
+DROP TABLE t1;
+connection slave;
+connection slave;
+SET @@global.debug_dbug=$save_dbug;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test b/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test
new file mode 100644
index 00000000000..36ddbd0dc9f
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test
@@ -0,0 +1,86 @@
+#
+# Purpose:
+# This test validates that a slave's relay log format description event is
+# not used to calculate the Seconds_Behind_Master time displayed by
+# SHOW SLAVE STATUS.
+#
+# Methodology:
+# Ensure that a slave's reported Seconds_Behind_Master does not point before
+# a time in which we can prove that it has progressed beyond. The slave's
+# relay log events are created using the timestamp at which the IO thread was
+# created. Therefore, after starting the slave's IO thread, we sleep so any
+# proceeding events are forced to have later timestamps. After sleeping, we run
+# MDL statements on the master and save the time at which they are binlogged.
+# Once the slave executes these MDL commands, we have proven that the slave has
+# caught up to this saved timestamp. At this point, if the value of
+# Seconds_Behind_Master points before the time in which the MDL events were
+# logged, it is invalid.
+#
+# References:
+# MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
+#
+--source include/have_debug.inc
+--source include/have_innodb.inc
+--source include/master-slave.inc
+
+--connection slave
+--source include/stop_slave.inc
+SET @save_dbug= @@GLOBAL.debug_dbug;
+SET @@global.debug_dbug="+d,pause_sql_thread_on_fde";
+--source include/start_slave.inc
+
+--let $sleep_time=2
+--echo # Future events must be logged at least $sleep_time seconds after
+--echo # the slave starts
+--sleep $sleep_time
+
+--connection master
+--echo # Write events to ensure slave will be consistent with master
+create table t1 (a int);
+insert into t1 values (1);
+--let $t_master_events_logged= `SELECT UNIX_TIMESTAMP()`
+
+--echo # Flush logs on master forces slave to generate a Format description
+--echo # event in its relay log
+flush logs;
+
+--connection slave
+--echo # Ignore FDEs that happen before the CREATE/INSERT commands
+SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
+SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
+SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
+SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
+
+--echo # On the next FDE, the slave should have the master CREATE/INSERT events
+SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
+select count(*)=1 from t1;
+
+--echo # The relay log FDE has been processed - here we check to ensure it was
+--echo # not considered in Seconds_Behind_Master calculation
+--connection slave1
+let $sbm= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1);
+--let $t_now= `SELECT UNIX_TIMESTAMP()`
+
+if(`select $sbm > $t_now - $t_master_events_logged`)
+{
+ die "A relay log event was incorrectly used to set Seconds_Behind_Master";
+}
+
+--echo # Safely resume slave SQL thread
+SET @@global.debug_dbug='';
+SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR';
+SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
+
+# Reset last sql_thread_continue signal
+SET DEBUG_SYNC='RESET';
+
+# Cleanup
+--connection master
+DROP TABLE t1;
+--save_master_pos
+--sync_slave_with_master
+
+--connection slave
+SET @@global.debug_dbug=$save_dbug;
+
+--source include/rpl_end.inc
diff --git a/sql/log.cc b/sql/log.cc
index c5a192d56d3..0f35d009a78 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -3571,6 +3571,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options
: BINLOG_CHECKSUM_ALG_OFF;
s.checksum_alg= relay_log_checksum_alg;
+ s.set_relay_log_event();
}
else
s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options;
diff --git a/sql/slave.cc b/sql/slave.cc
index 68ddc611c03..74ac50aaae7 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -4385,6 +4385,15 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
}
rli->executed_entries++;
+ DBUG_EXECUTE_IF(
+ "pause_sql_thread_on_fde",
+ if (ev && typ == FORMAT_DESCRIPTION_EVENT) {
+ DBUG_ASSERT(!debug_sync_set_action(
+ thd,
+ STRING_WITH_LEN(
+ "now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue")));
+ });
+
#ifdef WITH_WSREP
wsrep_after_statement(thd);
#endif /* WITH_WSREP */