diff options
author | Brandon Nesterenko <brandon.nesterenko@mariadb.com> | 2022-01-05 08:35:05 -0700 |
---|---|---|
committer | Brandon Nesterenko <brandon.nesterenko@mariadb.com> | 2022-01-05 08:40:20 -0700 |
commit | ddd79e29059dbb065c9f817382e4a0378171ba1d (patch) | |
tree | bf8045d13d634a1ef9266fe0ef01bbf83d0d9fb0 | |
parent | a48d2ec866751e9da76066bf3a30f99da9031ab0 (diff) | |
download | mariadb-git-10.4-MDEV-16091-merge.tar.gz |
MDEV-16091: Seconds_Behind_Master spikes to millions of seconds10.4-MDEV-16091-merge
Problem:
========
A slave’s relay log format description event is used when
calculating Seconds_Behind_Master (SBM). This forces the SBM
value to spike when processing these events, as their creation
date is set to the timestamp that the IO thread begins.
Solution:
========
When the slave generates a format description event, mark the
event as a relay log event so it does not update the
rli->last_master_timestamp variable.
Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
-rw-r--r-- | mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result | 41 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test | 86 | ||||
-rw-r--r-- | sql/log.cc | 1 | ||||
-rw-r--r-- | sql/slave.cc | 9 |
4 files changed, 137 insertions, 0 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result b/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result new file mode 100644 index 00000000000..d164ea5434f --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result @@ -0,0 +1,41 @@ +include/master-slave.inc +[connection master] +connection slave; +include/stop_slave.inc +SET @save_dbug= @@GLOBAL.debug_dbug; +SET @@global.debug_dbug="+d,pause_sql_thread_on_fde"; +include/start_slave.inc +# Future events must be logged at least 2 seconds after +# the slave starts +connection master; +# Write events to ensure slave will be consistent with master +create table t1 (a int); +insert into t1 values (1); +# Flush logs on master forces slave to generate a Format description +# event in its relay log +flush logs; +connection slave; +# Ignore FDEs that happen before the CREATE/INSERT commands +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; +# On the next FDE, the slave should have the master CREATE/INSERT events +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +select count(*)=1 from t1; +count(*)=1 +1 +# The relay log FDE has been processed - here we check to ensure it was +# not considered in Seconds_Behind_Master calculation +connection slave1; +# Safely resume slave SQL thread +SET @@global.debug_dbug=''; +SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; +SET DEBUG_SYNC='RESET'; +connection master; +DROP TABLE t1; +connection slave; +connection slave; +SET @@global.debug_dbug=$save_dbug; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test b/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test new file mode 100644 index 00000000000..36ddbd0dc9f --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test @@ -0,0 +1,86 @@ +# +# Purpose: +# This test validates that a slave's relay log format description event is +# not used to calculate the Seconds_Behind_Master time displayed by +# SHOW SLAVE STATUS. +# +# Methodology: +# Ensure that a slave's reported Seconds_Behind_Master does not point before +# a time in which we can prove that it has progressed beyond. The slave's +# relay log events are created using the timestamp at which the IO thread was +# created. Therefore, after starting the slave's IO thread, we sleep so any +# proceeding events are forced to have later timestamps. After sleeping, we run +# MDL statements on the master and save the time at which they are binlogged. +# Once the slave executes these MDL commands, we have proven that the slave has +# caught up to this saved timestamp. At this point, if the value of +# Seconds_Behind_Master points before the time in which the MDL events were +# logged, it is invalid. +# +# References: +# MDEV-16091: Seconds_Behind_Master spikes to millions of seconds +# +--source include/have_debug.inc +--source include/have_innodb.inc +--source include/master-slave.inc + +--connection slave +--source include/stop_slave.inc +SET @save_dbug= @@GLOBAL.debug_dbug; +SET @@global.debug_dbug="+d,pause_sql_thread_on_fde"; +--source include/start_slave.inc + +--let $sleep_time=2 +--echo # Future events must be logged at least $sleep_time seconds after +--echo # the slave starts +--sleep $sleep_time + +--connection master +--echo # Write events to ensure slave will be consistent with master +create table t1 (a int); +insert into t1 values (1); +--let $t_master_events_logged= `SELECT UNIX_TIMESTAMP()` + +--echo # Flush logs on master forces slave to generate a Format description +--echo # event in its relay log +flush logs; + +--connection slave +--echo # Ignore FDEs that happen before the CREATE/INSERT commands +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; + +--echo # On the next FDE, the slave should have the master CREATE/INSERT events +SET DEBUG_SYNC='now WAIT_FOR paused_on_fde'; +select count(*)=1 from t1; + +--echo # The relay log FDE has been processed - here we check to ensure it was +--echo # not considered in Seconds_Behind_Master calculation +--connection slave1 +let $sbm= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1); +--let $t_now= `SELECT UNIX_TIMESTAMP()` + +if(`select $sbm > $t_now - $t_master_events_logged`) +{ + die "A relay log event was incorrectly used to set Seconds_Behind_Master"; +} + +--echo # Safely resume slave SQL thread +SET @@global.debug_dbug=''; +SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR'; +SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; + +# Reset last sql_thread_continue signal +SET DEBUG_SYNC='RESET'; + +# Cleanup +--connection master +DROP TABLE t1; +--save_master_pos +--sync_slave_with_master + +--connection slave +SET @@global.debug_dbug=$save_dbug; + +--source include/rpl_end.inc diff --git a/sql/log.cc b/sql/log.cc index c5a192d56d3..0f35d009a78 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -3571,6 +3571,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name, opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options : BINLOG_CHECKSUM_ALG_OFF; s.checksum_alg= relay_log_checksum_alg; + s.set_relay_log_event(); } else s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options; diff --git a/sql/slave.cc b/sql/slave.cc index 68ddc611c03..74ac50aaae7 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -4385,6 +4385,15 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli, } rli->executed_entries++; + DBUG_EXECUTE_IF( + "pause_sql_thread_on_fde", + if (ev && typ == FORMAT_DESCRIPTION_EVENT) { + DBUG_ASSERT(!debug_sync_set_action( + thd, + STRING_WITH_LEN( + "now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue"))); + }); + #ifdef WITH_WSREP wsrep_after_statement(thd); #endif /* WITH_WSREP */ |