From 93f5d40656e3faa6cac9fe66934f6d8f3aa15d98 Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 17 Jun 2021 20:15:24 +0300 Subject: Fixed debug_sync timeout in deadlock_drop_table The issue was that we sent two different signals to different threads after each other. The DEBUG_SYNC functionality cannot handle this (as the signal is stored in a global variable) and the first one can get lost. Fixed by using the same signal for both threads. --- .../suite/mariabackup/deadlock_drop_table.result | 33 ------------------ .../suite/mariabackup/deadlock_drop_table.test | 35 ------------------- .../suite/stress/r/deadlock_drop_table.result | 32 ++++++++++++++++++ mysql-test/suite/stress/t/deadlock_drop_table.test | 39 ++++++++++++++++++++++ sql/backup.cc | 12 +++++-- sql/debug_sync.cc | 9 +++++ sql/mysqld.cc | 2 ++ sql/mysqld.h | 2 ++ 8 files changed, 94 insertions(+), 70 deletions(-) delete mode 100644 mysql-test/suite/mariabackup/deadlock_drop_table.result delete mode 100644 mysql-test/suite/mariabackup/deadlock_drop_table.test create mode 100644 mysql-test/suite/stress/r/deadlock_drop_table.result create mode 100644 mysql-test/suite/stress/t/deadlock_drop_table.test diff --git a/mysql-test/suite/mariabackup/deadlock_drop_table.result b/mysql-test/suite/mariabackup/deadlock_drop_table.result deleted file mode 100644 index 7e549157c81..00000000000 --- a/mysql-test/suite/mariabackup/deadlock_drop_table.result +++ /dev/null @@ -1,33 +0,0 @@ -create or replace table t1 (a int primary key, b int, c int, key(b),key(c)) engine=myisam; -insert into t1 (a) values(1); -set debug_sync='RESET'; -connect con1, localhost, root,,; -connect con2, localhost, root,,; -connection default; -backup stage start; -backup stage flush; -select * from t1; -a b c -1 NULL NULL -set debug_sync='after_purge_tables SIGNAL parked WAIT_FOR go'; -set debug_sync='before_tc_release_table SIGNAL parked2 WAIT_FOR go2'; -backup stage BLOCK_DDL; -connection con1; -set debug_sync='now WAIT_FOR parked'; -select * from t1; -a b c -1 NULL NULL -set debug_sync='now SIGNAL go'; -set debug_sync='now WAIT_FOR parked2'; -set debug_sync='before_wait_for_refs SIGNAL waiting WAIT_FOR go3'; -drop table t1;; -connection con2; -set debug_sync='now WAIT_FOR waiting'; -set debug_sync='now SIGNAL go2'; -set debug_sync='now SIGNAL go3'; -connection default; -connection con1; -connection default; -disconnect con1; -disconnect con2; -set debug_sync='RESET'; diff --git a/mysql-test/suite/mariabackup/deadlock_drop_table.test b/mysql-test/suite/mariabackup/deadlock_drop_table.test deleted file mode 100644 index 81350726b76..00000000000 --- a/mysql-test/suite/mariabackup/deadlock_drop_table.test +++ /dev/null @@ -1,35 +0,0 @@ ---source include/have_debug.inc - -create or replace table t1 (a int primary key, b int, c int, key(b),key(c)) engine=myisam; -insert into t1 (a) values(1); - -set debug_sync='RESET'; -connect (con1, localhost, root,,); -connect (con2, localhost, root,,); -connection default; - -backup stage start; -backup stage flush; -select * from t1; -set debug_sync='after_purge_tables SIGNAL parked WAIT_FOR go'; -set debug_sync='before_tc_release_table SIGNAL parked2 WAIT_FOR go2'; ---send backup stage BLOCK_DDL ---connection con1 -set debug_sync='now WAIT_FOR parked'; -select * from t1; -set debug_sync='now SIGNAL go'; -set debug_sync='now WAIT_FOR parked2'; -set debug_sync='before_wait_for_refs SIGNAL waiting WAIT_FOR go3'; ---send drop table t1; ---connection con2 -set debug_sync='now WAIT_FOR waiting'; -set debug_sync='now SIGNAL go2'; -set debug_sync='now SIGNAL go3'; ---connection default ---reap ---connection con1 ---reap -connection default; -disconnect con1; -disconnect con2; -set debug_sync='RESET'; diff --git a/mysql-test/suite/stress/r/deadlock_drop_table.result b/mysql-test/suite/stress/r/deadlock_drop_table.result new file mode 100644 index 00000000000..326f694e8d9 --- /dev/null +++ b/mysql-test/suite/stress/r/deadlock_drop_table.result @@ -0,0 +1,32 @@ +create or replace table t1 (a int primary key, b int, c int, key(b),key(c)) engine=myisam; +insert into t1 (a) values(1); +set debug_sync='RESET'; +connect con1, localhost, root,,; +connect con2, localhost, root,,; +connection default; +backup stage start; +backup stage flush; +select * from t1; +a b c +1 NULL NULL +set debug_sync='after_purge_tables SIGNAL parked WAIT_FOR go'; +set debug_sync='before_tc_release_table SIGNAL parked2 WAIT_FOR go2'; +backup stage BLOCK_DDL; +connection con1; +set debug_sync='now WAIT_FOR parked'; +select * from t1; +a b c +1 NULL NULL +set debug_sync='now SIGNAL go'; +set debug_sync='now WAIT_FOR parked2'; +set debug_sync='before_wait_for_refs SIGNAL waiting WAIT_FOR go2'; +drop table t1;; +connection con2; +set debug_sync='now WAIT_FOR waiting'; +set debug_sync='now SIGNAL go2'; +connection default; +connection con1; +connection default; +disconnect con1; +disconnect con2; +set debug_sync='RESET'; diff --git a/mysql-test/suite/stress/t/deadlock_drop_table.test b/mysql-test/suite/stress/t/deadlock_drop_table.test new file mode 100644 index 00000000000..b49ca0b9fb7 --- /dev/null +++ b/mysql-test/suite/stress/t/deadlock_drop_table.test @@ -0,0 +1,39 @@ +--source include/have_debug.inc + +create or replace table t1 (a int primary key, b int, c int, key(b),key(c)) engine=myisam; +insert into t1 (a) values(1); + +set debug_sync='RESET'; +connect (con1, localhost, root,,); +connect (con2, localhost, root,,); +connection default; + +backup stage start; +backup stage flush; +select * from t1; +set debug_sync='after_purge_tables SIGNAL parked WAIT_FOR go'; +set debug_sync='before_tc_release_table SIGNAL parked2 WAIT_FOR go2'; +--send backup stage BLOCK_DDL +--connection con1 +set debug_sync='now WAIT_FOR parked'; +select * from t1; +set debug_sync='now SIGNAL go'; +set debug_sync='now WAIT_FOR parked2'; +set debug_sync='before_wait_for_refs SIGNAL waiting WAIT_FOR go2'; +--send drop table t1; +--connection con2 +set debug_sync='now WAIT_FOR waiting'; +set debug_sync='now SIGNAL go2'; + +# Write out show processlist if the debug sync point times out +let $wait_condition= select count(*)=0 from information_schema.processlist where state like "%debug%"; +source include/wait_condition.inc; + +--connection default +--reap +--connection con1 +--reap +connection default; +disconnect con1; +disconnect con2; +set debug_sync='RESET'; diff --git a/sql/backup.cc b/sql/backup.cc index 1b3007c5a00..5367a75b094 100644 --- a/sql/backup.cc +++ b/sql/backup.cc @@ -258,16 +258,19 @@ static bool backup_flush(THD *thd) static bool backup_block_ddl(THD *thd) { + PSI_stage_info org_stage; DBUG_ENTER("backup_block_ddl"); kill_delayed_threads(); mysql_ha_cleanup_no_free(thd); + thd->backup_stage(&org_stage); + THD_STAGE_INFO(thd, stage_waiting_for_flush); /* Wait until all non trans statements has ended */ if (thd->mdl_context.upgrade_shared_lock(backup_flush_ticket, MDL_BACKUP_WAIT_FLUSH, thd->variables.lock_wait_timeout)) - DBUG_RETURN(1); + goto err; /* Remove not used tables from the table share. Flush all changes to @@ -284,6 +287,7 @@ static bool backup_block_ddl(THD *thd) We didn't do this lock above, as we wanted DDL's to be executed while we wait for non transactional tables (which may take a while). */ + THD_STAGE_INFO(thd, stage_waiting_for_ddl); if (thd->mdl_context.upgrade_shared_lock(backup_flush_ticket, MDL_BACKUP_WAIT_DDL, thd->variables.lock_wait_timeout)) @@ -293,12 +297,16 @@ static bool backup_block_ddl(THD *thd) was called so that this function can be called again */ backup_flush_ticket->downgrade_lock(MDL_BACKUP_FLUSH); - DBUG_RETURN(1); + goto err; } /* There can't be anything more that needs to be logged to ddl log */ + THD_STAGE_INFO(thd, org_stage); stop_ddl_logging(); DBUG_RETURN(0); +err: + THD_STAGE_INFO(thd, org_stage); + DBUG_RETURN(1); } diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc index 1fbb15592a4..debda4c8970 100644 --- a/sql/debug_sync.cc +++ b/sql/debug_sync.cc @@ -995,6 +995,15 @@ static char *debug_sync_number(ulong *number_p, char *actstrptr, The input string needs to be ASCII NUL ('\0') terminated. We split nul-terminated tokens in it without copy. + @note + The current implementation does not support two 'now SIGNAL xxx' commands + in a row for multiple threads as the first one can get lost while + the waiting threads are sleeping on mysql_cond_timedwait(). + One reason for this is that the signal name is stored in a global variable + that is overwritten. A better way would be to store all signals in + an array together with a 'time' when the signal was sent. This array + should be checked on broadcast. + @see the function comment of debug_sync_token() for more constraints for the string. */ diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 249dde9177c..698bcfff0f7 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -9146,6 +9146,8 @@ PSI_stage_info stage_slave_background_process_request= { 0, "Processing requests PSI_stage_info stage_slave_background_wait_request= { 0, "Waiting for requests", 0}; PSI_stage_info stage_waiting_for_deadlock_kill= { 0, "Waiting for parallel replication deadlock handling to complete", 0}; PSI_stage_info stage_starting= { 0, "starting", 0}; +PSI_stage_info stage_waiting_for_flush= { 0, "Waiting for non trans tables to be flushed", 0}; +PSI_stage_info stage_waiting_for_ddl= { 0, "Waiting for DDLs", 0}; PSI_memory_key key_memory_DATE_TIME_FORMAT; PSI_memory_key key_memory_DDL_LOG_MEMORY_ENTRY; diff --git a/sql/mysqld.h b/sql/mysqld.h index 5356fd967c7..a74c6ce8bda 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -639,7 +639,9 @@ extern PSI_stage_info stage_upgrading_lock; extern PSI_stage_info stage_user_lock; extern PSI_stage_info stage_user_sleep; extern PSI_stage_info stage_verifying_table; +extern PSI_stage_info stage_waiting_for_ddl; extern PSI_stage_info stage_waiting_for_delay_list; +extern PSI_stage_info stage_waiting_for_flush; extern PSI_stage_info stage_waiting_for_gtid_to_be_written_to_binary_log; extern PSI_stage_info stage_waiting_for_handler_insert; extern PSI_stage_info stage_waiting_for_handler_lock; -- cgit v1.2.1