summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrandon Nesterenko <brandon.nesterenko@mariadb.com>2022-02-08 09:53:56 -0700
committerBrandon Nesterenko <brandon.nesterenko@mariadb.com>2022-02-15 08:00:28 -0700
commitcdf19cd618ed23fbf7051130b2a6b587c4a4316b (patch)
treeaceac722f8b7ced68dc4a639409dab04ee224b47
parentb557f26309ad1e47c08be4cdb436acbb9d5aa049 (diff)
downloadmariadb-git-cdf19cd618ed23fbf7051130b2a6b587c4a4316b.tar.gz
MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
The rpl.rpl_seconds_behind_master_spike test would sometimes timeout or take a very long time to complete. This happened because an MTR DEBUG_SYNC signal would be lost due to a subsequent call to RESET. I.e., the slave SQL thread would be paused due to the WAIT_FOR signal being lost, resulting in either a failed test if the `select master_pos_wait` timeout occurs first, or a very long run-time if the DBUG_SYNC timeout occurs first. The fix ensures that the MTR signal is processed by the slave SQL thread before issuing the call to RESET Reviewed By: ============ Andrei Elkin <andrei.elkin@mariadb.com>
-rw-r--r--mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result5
-rw-r--r--mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test18
2 files changed, 19 insertions, 4 deletions
diff --git a/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result b/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result
index be18f95c2c0..4eeb863bb40 100644
--- a/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result
+++ b/mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result
@@ -29,9 +29,10 @@ count(*)=1
# not considered in Seconds_Behind_Master calculation
connection slave1;
# Safely resume slave SQL thread
-SET @@global.debug_dbug='';
-SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR';
+# Prove SQL thread is in state "debug sync point: now"
+SET @@global.debug_dbug="-d,pause_sql_thread_on_fde";
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
+# Wait for SQL thread to continue into normal execution
SET DEBUG_SYNC='RESET';
connection master;
DROP TABLE t1;
diff --git a/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test b/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test
index 029625a09ad..31a9478f632 100644
--- a/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test
+++ b/mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test
@@ -71,10 +71,24 @@ if(`select $sbm > $t_now - $t_master_events_logged + 1`)
}
--echo # Safely resume slave SQL thread
-SET @@global.debug_dbug='';
-SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR';
+
+--let $dbug_wait_state="debug sync point: now"
+--echo # Prove SQL thread is in state $dbug_wait_state
+--let $wait_condition= SELECT STATE=$dbug_wait_state from information_schema.PROCESSLIST where COMMAND="Slave_SQL"
+--source include/wait_condition.inc
+
+SET @@global.debug_dbug="-d,pause_sql_thread_on_fde";
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
+# We have to wait for the SQL thread to acknowledge the sql_thread_continue
+# signal. Otherwise the below RESET command can overwrite the signal before
+# the SQL thread is notified to proceed, causing it to "permanently" become
+# stuck awaiting the signal (until timeout is reached).
+
+--echo # Wait for SQL thread to continue into normal execution
+--let $wait_condition= SELECT STATE!= $dbug_wait_state from information_schema.PROCESSLIST where COMMAND="Slave_SQL"
+--source include/wait_condition.inc
+
# Reset last sql_thread_continue signal
SET DEBUG_SYNC='RESET';