From 3616640a3149b318e0d5602dd39f05e309514dbb Mon Sep 17 00:00:00 2001 From: Andrei Elkin Date: Fri, 17 Jan 2020 20:26:14 +0200 Subject: MDEV-20821 parallel slave server shutdown hang Parallel slave server shutdown found to be hanging in close_connections() triggered by shutdown due to a slave worker thread would not be notified to exit in case the worker was sitting idle. Fixed with destroying the worker pool earlier that is in slave_prepare_for_shutdown() when all their driver threads have already left. A test file is added to simulate the bug condition as well as check multi-sourced and not-idle worker cases. --- .../rpl/r/rpl_slave_shutdown_mdev20821.result | 79 ++++++++++ .../suite/rpl/t/rpl_slave_shutdown_mdev20821.cnf | 19 +++ .../suite/rpl/t/rpl_slave_shutdown_mdev20821.test | 165 +++++++++++++++++++++ sql/slave.cc | 3 + 4 files changed, 266 insertions(+) create mode 100644 mysql-test/suite/rpl/r/rpl_slave_shutdown_mdev20821.result create mode 100644 mysql-test/suite/rpl/t/rpl_slave_shutdown_mdev20821.cnf create mode 100644 mysql-test/suite/rpl/t/rpl_slave_shutdown_mdev20821.test diff --git a/mysql-test/suite/rpl/r/rpl_slave_shutdown_mdev20821.result b/mysql-test/suite/rpl/r/rpl_slave_shutdown_mdev20821.result new file mode 100644 index 00000000000..f90d2126103 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_slave_shutdown_mdev20821.result @@ -0,0 +1,79 @@ +include/rpl_init.inc [topology=1->3] +connection server_3; +set default_master_connection = ''; +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +set default_master_connection = 'm2'; +change master to master_host='127.0.0.1', master_port=SERVER_MYPORT_2, master_user='root', master_use_gtid=slave_pos; +include/start_slave.inc +select @@global.slave_parallel_workers as two; +two +2 +connection server_3; +SHUTDOWN; +connection server_3; +connection server_3; +connection server_1; +create table t1 (i int primary key) engine=Innodb; +connection server_2; +create table t2 (i int primary key) engine=Innodb; +connection server_3; +set default_master_connection = ''; +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +set default_master_connection = 'm2'; +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +connection server_2; +insert into t2 values (1); +connection server_3; +connection server_1; +insert into t1 values (1); +connection server_3; +connection server_3; +SHUTDOWN; +connection server_3; +connection server_3; +connection server_3; +set default_master_connection = ''; +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +set default_master_connection = 'm2'; +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +connect conn_block_server3, 127.0.0.1, root,, test, $SERVER_MYPORT_3,; +begin; +insert into t1 values (2); +insert into t2 values (2); +connection server_1; +insert into t1 values (2); +connection server_2; +insert into t2 values (2); +connection server_3; +SHUTDOWN; +connection server_3; +connection server_3; +connection server_3; +set default_master_connection = ''; +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +set default_master_connection = 'm2'; +include/start_slave.inc +Warnings: +Note 1254 Slave is already running +connection server_1; +drop table t1; +connection server_2; +drop table t2; +connection server_3; +set default_master_connection = 'm2'; +include/stop_slave.inc +RESET SLAVE ALL; +set default_master_connection = ''; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_slave_shutdown_mdev20821.cnf b/mysql-test/suite/rpl/t/rpl_slave_shutdown_mdev20821.cnf new file mode 100644 index 00000000000..1e7cdee510b --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_slave_shutdown_mdev20821.cnf @@ -0,0 +1,19 @@ +!include suite/rpl/rpl_1slave_base.cnf +!include include/default_client.cnf + +[mysqld.1] +log-slave-updates +gtid-domain-id=1 + +[mysqld.2] +log-slave-updates +gtid-domain-id=2 + +[mysqld.3] +log-slave-updates +gtid-domain-id=3 +slave_parallel_threads=2 + +[ENV] +SERVER_MYPORT_3= @mysqld.3.port +SERVER_MYSOCK_3= @mysqld.3.socket diff --git a/mysql-test/suite/rpl/t/rpl_slave_shutdown_mdev20821.test b/mysql-test/suite/rpl/t/rpl_slave_shutdown_mdev20821.test new file mode 100644 index 00000000000..563533bb104 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_slave_shutdown_mdev20821.test @@ -0,0 +1,165 @@ +# MDEV-20821 parallel slave server shutdown hang +# +# Test the bug condition of a parallel slave server shutdown +# hang when the parallel workers were idle. +# The bug reported scenario is extented to cover the multi-sources case as well as +# checking is done for both the idle and busy workers cases. + +--source include/have_innodb.inc +--source include/have_binlog_format_mixed.inc +--let $rpl_topology= 1->3 +--source include/rpl_init.inc + +# +# A. idle workers. +# +--connection server_3 +set default_master_connection = ''; +--source include/start_slave.inc + +set default_master_connection = 'm2'; +--replace_result $SERVER_MYPORT_2 SERVER_MYPORT_2 +eval change master to master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, master_user='root', master_use_gtid=slave_pos; +--source include/start_slave.inc + +select @@global.slave_parallel_workers as two; + +# At this point worker threads have no assignement. +# Shutdown must not hang. + +--connection server_3 +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect +wait +EOF +--send SHUTDOWN +--reap +--source include/wait_until_disconnected.inc + +--connection server_3 +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect +restart +EOF + +# No hang is *proved* to occur when this point is reached. +--connection server_3 +--enable_reconnect +--source include/wait_until_connected_again.inc + +# +# B. resting workers after some busy time +# +--connection server_1 +create table t1 (i int primary key) engine=Innodb; + +--connection server_2 +create table t2 (i int primary key) engine=Innodb; + +--connection server_3 +set default_master_connection = ''; +--source include/start_slave.inc + +set default_master_connection = 'm2'; +--source include/start_slave.inc + +--connection server_2 +insert into t2 values (1); +--save_master_pos + +--connection server_3 +--sync_with_master 0,'m2' + +--connection server_1 +insert into t1 values (1); +--save_master_pos + +--connection server_3 +--sync_with_master 0,'' + +# At this point worker threads have no assignement. +# Shutdown must not hang. + +--connection server_3 +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect +wait +EOF +--send SHUTDOWN +--reap +--source include/wait_until_disconnected.inc + +--connection server_3 +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect +restart +EOF + +# No hang is *proved* to occur when this point is reached. +--connection server_3 +--enable_reconnect +--source include/wait_until_connected_again.inc + +# +# C. busy workers +# +--connection server_3 +set default_master_connection = ''; +--source include/start_slave.inc + +set default_master_connection = 'm2'; +--source include/start_slave.inc + +--connect (conn_block_server3, 127.0.0.1, root,, test, $SERVER_MYPORT_3,) +begin; + insert into t1 values (2); + insert into t2 values (2); + +--connection server_1 +insert into t1 values (2); +--connection server_2 +insert into t2 values (2); + + +# At this point there's a good chance the worker threads are busy. +# SHUTDOWN must proceed without any delay as above. +--connection server_3 +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect +wait +EOF +--send SHUTDOWN +--reap +--source include/wait_until_disconnected.inc + +--connection server_3 +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.3.expect +restart +EOF + +# No hang is *proved* to occur when this point is reached. +--connection server_3 +--enable_reconnect +--source include/wait_until_connected_again.inc + + +# Cleanup + +--connection server_3 +set default_master_connection = ''; +--source include/start_slave.inc + +set default_master_connection = 'm2'; +--source include/start_slave.inc + +--connection server_1 +drop table t1; + +--connection server_2 +drop table t2; +--save_master_pos + +# (!) The following block is critical to avoid check-mysqld_3.reject by mtr: +--connection server_3 +--sync_with_master 0,'m2' +set default_master_connection = 'm2'; +--source include/stop_slave.inc +RESET SLAVE ALL; +set default_master_connection = ''; + +--source include/rpl_end.inc diff --git a/sql/slave.cc b/sql/slave.cc index 3124b2d10ab..8e26301d926 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -975,6 +975,9 @@ void slave_prepare_for_shutdown() mysql_mutex_lock(&LOCK_active_mi); master_info_index->free_connections(); mysql_mutex_unlock(&LOCK_active_mi); + // It's safe to destruct worker pool now when + // all driver threads are gone. + global_rpl_thread_pool.destroy(); } /* -- cgit v1.2.1