diff options
author | Monty <monty@mariadb.org> | 2017-01-29 22:10:56 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2017-02-28 16:10:46 +0100 |
commit | e65f667bb60244610512efd7491fc77eccceb9db (patch) | |
tree | ff549849324d917615ab896afb7e25ee0bfc7396 /sql/rpl_parallel.cc | |
parent | d5c54f3990b49d7c7d6a410016e85e8e58803895 (diff) | |
download | mariadb-git-e65f667bb60244610512efd7491fc77eccceb9db.tar.gz |
MDEV-9573 'Stop slave' hangs on replication slave
The reason for this is that stop slave takes LOCK_active_mi over the
whole operation while some slave operations will also need LOCK_active_mi
which causes deadlocks.
Fixed by introducing object counting for Master_info and not taking
LOCK_active_mi over stop slave or even stop_all_slaves()
Another benefit of this approach is that it allows:
- Multiple threads can run SHOW SLAVE STATUS at the same time
- START/STOP/RESET/SLAVE STATUS on a slave will not block other slaves
- Simpler interface for handling get_master_info()
- Added some missing unlock of 'log_lock' in error condtions
- Moved rpl_parallel_inactivate_pool(&global_rpl_thread_pool) to end
of stop_slave() to not have to use LOCK_active_mi inside
terminate_slave_threads()
- Changed argument for remove_master_info() to Master_info, as we always
have this available
- Fixed core dump when doing FLUSH TABLES WITH READ LOCK and parallel
replication. Problem was that waiting for pause_for_ftwrl was not done
when deleting rpt->current_owner after a force_abort.
Diffstat (limited to 'sql/rpl_parallel.cc')
-rw-r--r-- | sql/rpl_parallel.cc | 62 |
1 files changed, 61 insertions, 1 deletions
diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 23f61a82a90..ff2c7d68467 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -1312,6 +1312,29 @@ handle_rpl_parallel_thread(void *arg) } if (!in_event_group) { + /* If we are in a FLUSH TABLES FOR READ LOCK, wait for it */ + while (rpt->current_entry && rpt->pause_for_ftwrl) + { + /* + We are currently in the delicate process of pausing parallel + replication while FLUSH TABLES WITH READ LOCK is starting. We must + not de-allocate the thread (setting rpt->current_owner= NULL) until + rpl_unpause_after_ftwrl() has woken us up. + */ + rpl_parallel_entry *e= rpt->current_entry; + /* + Ensure that we will unblock rpl_pause_for_ftrwl() + e->pause_sub_id may be LONGLONG_MAX if rpt->current_entry has changed + */ + DBUG_ASSERT(e->pause_sub_id == (uint64)ULONGLONG_MAX || + e->last_committed_sub_id >= e->pause_sub_id); + mysql_mutex_lock(&e->LOCK_parallel_entry); + mysql_mutex_unlock(&rpt->LOCK_rpl_thread); + if (rpt->pause_for_ftwrl) + mysql_cond_wait(&e->COND_parallel_entry, &e->LOCK_parallel_entry); + mysql_mutex_unlock(&e->LOCK_parallel_entry); + mysql_mutex_lock(&rpt->LOCK_rpl_thread); + } rpt->current_owner= NULL; /* Tell wait_for_done() that we are done, if it is waiting. */ if (likely(rpt->current_entry) && @@ -1369,6 +1392,28 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, if ((res= pool_mark_busy(pool, current_thd))) return res; + /* Protect against parallel pool resizes */ + if (pool->count == new_count) + { + pool_mark_not_busy(pool); + return 0; + } + + /* + If we are about to delete pool, do an extra check that there are no new + slave threads running since we marked pool busy + */ + if (!new_count) + { + if (any_slave_sql_running()) + { + DBUG_PRINT("warning", + ("SQL threads running while trying to reset parallel pool")); + pool_mark_not_busy(pool); + return 0; // Ok to not resize pool + } + } + /* Allocate the new list of threads up-front. That way, if we fail half-way, we only need to free whatever we managed @@ -1382,7 +1427,7 @@ rpl_parallel_change_thread_count(rpl_parallel_thread_pool *pool, { my_error(ER_OUTOFMEMORY, MYF(0), (int(new_count*sizeof(*new_list) + new_count*sizeof(*rpt_array)))); - goto err;; + goto err; } for (i= 0; i < new_count; ++i) @@ -1503,6 +1548,20 @@ err: return 1; } +/* + Deactivate the parallel replication thread pool, if there are now no more + SQL threads running. +*/ + +int rpl_parallel_resize_pool_if_no_slaves(void) +{ + /* master_info_index is set to NULL on shutdown */ + if (opt_slave_parallel_threads > 0 && !any_slave_sql_running() && + master_info_index) + return rpl_parallel_inactivate_pool(&global_rpl_thread_pool); + return 0; +} + int rpl_parallel_activate_pool(rpl_parallel_thread_pool *pool) @@ -1814,6 +1873,7 @@ rpl_parallel_thread_pool::get_thread(rpl_parallel_thread **owner, { rpl_parallel_thread *rpt; + DBUG_ASSERT(count > 0); mysql_mutex_lock(&LOCK_rpl_thread_pool); while (unlikely(busy) || !(rpt= free_list)) mysql_cond_wait(&COND_rpl_thread_pool, &LOCK_rpl_thread_pool); |