diff options
author | Brandon Nesterenko <brandon.nesterenko@mariadb.com> | 2022-02-08 09:53:56 -0700 |
---|---|---|
committer | Brandon Nesterenko <brandon.nesterenko@mariadb.com> | 2022-02-15 08:00:28 -0700 |
commit | cdf19cd618ed23fbf7051130b2a6b587c4a4316b (patch) | |
tree | aceac722f8b7ced68dc4a639409dab04ee224b47 | |
parent | b557f26309ad1e47c08be4cdb436acbb9d5aa049 (diff) | |
download | mariadb-git-cdf19cd618ed23fbf7051130b2a6b587c4a4316b.tar.gz |
MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
The rpl.rpl_seconds_behind_master_spike test would sometimes
timeout or take a very long time to complete. This happened
because an MTR DEBUG_SYNC signal would be lost due to a
subsequent call to RESET. I.e., the slave SQL thread would
be paused due to the WAIT_FOR signal being lost, resulting in
either a failed test if the `select master_pos_wait` timeout
occurs first, or a very long run-time if the DBUG_SYNC timeout
occurs first.
The fix ensures that the MTR signal is processed by the slave
SQL thread before issuing the call to RESET
Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
-rw-r--r-- | mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result | 5 | ||||
-rw-r--r-- | mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test | 18 |
2 files changed, 19 insertions, 4 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result b/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result index be18f95c2c0..4eeb863bb40 100644 --- a/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result +++ b/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result @@ -29,9 +29,10 @@ count(*)=1 # not considered in Seconds_Behind_Master calculation connection slave1; # Safely resume slave SQL thread -SET @@global.debug_dbug=''; -SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR'; +# Prove SQL thread is in state "debug sync point: now" +SET @@global.debug_dbug="-d,pause_sql_thread_on_fde"; SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; +# Wait for SQL thread to continue into normal execution SET DEBUG_SYNC='RESET'; connection master; DROP TABLE t1; diff --git a/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test b/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test index 029625a09ad..31a9478f632 100644 --- a/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test +++ b/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test @@ -71,10 +71,24 @@ if(`select $sbm > $t_now - $t_master_events_logged + 1`) } --echo # Safely resume slave SQL thread -SET @@global.debug_dbug=''; -SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR'; + +--let $dbug_wait_state="debug sync point: now" +--echo # Prove SQL thread is in state $dbug_wait_state +--let $wait_condition= SELECT STATE=$dbug_wait_state from information_schema.PROCESSLIST where COMMAND="Slave_SQL" +--source include/wait_condition.inc + +SET @@global.debug_dbug="-d,pause_sql_thread_on_fde"; SET DEBUG_SYNC='now SIGNAL sql_thread_continue'; +# We have to wait for the SQL thread to acknowledge the sql_thread_continue +# signal. Otherwise the below RESET command can overwrite the signal before +# the SQL thread is notified to proceed, causing it to "permanently" become +# stuck awaiting the signal (until timeout is reached). + +--echo # Wait for SQL thread to continue into normal execution +--let $wait_condition= SELECT STATE!= $dbug_wait_state from information_schema.PROCESSLIST where COMMAND="Slave_SQL" +--source include/wait_condition.inc + # Reset last sql_thread_continue signal SET DEBUG_SYNC='RESET'; |