From 66f4900b517681da2aed3b562158ef58679961e4 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 11 Jan 2021 13:16:38 +0100 Subject: Revert "MDEV-23536 : Race condition between KILL and transaction commit" This reverts the server part of the commit 775fccea0 but keeps InnoDB part (which reverted MDEV-17092 5530a93f4). So after this both MDEV-23536 and MDEV-17092 are reverted, and the original bug is resurrected. --- sql/sql_class.cc | 38 +++++++------------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) (limited to 'sql/sql_class.cc') diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 2321991d99f..2595717572a 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1357,15 +1357,12 @@ void THD::change_user(void) /* Do operations that may take a long time */ -void THD::cleanup(bool have_mutex) +void THD::cleanup(void) { DBUG_ENTER("THD::cleanup"); DBUG_ASSERT(cleanup_done == 0); - if (have_mutex) - set_killed_no_mutex(KILL_CONNECTION,0,0); - else - set_killed(KILL_CONNECTION); + set_killed(KILL_CONNECTION); #ifdef ENABLE_WHEN_BINLOG_WILL_BE_ABLE_TO_PREPARE if (transaction.xid_state.xa_state == XA_PREPARED) { @@ -1440,28 +1437,6 @@ void THD::cleanup(bool have_mutex) void THD::free_connection() { DBUG_ASSERT(free_connection_done == 0); - /* Check that we have already called thd->unlink() */ - DBUG_ASSERT(prev == 0 && next == 0); - - /* - Other threads may have a lock on THD::LOCK_thd_data or - THD::LOCK_thd_kill to ensure that this THD is not deleted - while they access it. The following mutex_lock ensures - that no one else is using this THD and it's now safe to - continue. - - For example consider KILL-statement execution on - sql_parse.cc kill_one_thread() that will use - THD::LOCK_thd_data to protect victim thread during - THD::awake(). - */ - mysql_mutex_lock(&LOCK_thd_data); - mysql_mutex_lock(&LOCK_thd_kill); - -#ifdef WITH_WSREP - delete wsrep_rgi; - wsrep_rgi= 0; -#endif /* WITH_WSREP */ my_free(db); db= NULL; #ifndef EMBEDDED_LIBRARY @@ -1470,8 +1445,8 @@ void THD::free_connection() net.vio= 0; net_end(&net); #endif - if (!cleanup_done) - cleanup(true); // We have locked THD::LOCK_thd_kill + if (!cleanup_done) + cleanup(); ha_close_connection(this); plugin_thdvar_cleanup(this); mysql_audit_free_thd(this); @@ -1482,8 +1457,6 @@ void THD::free_connection() #if defined(ENABLED_PROFILING) profiling.restart(); // Reset profiling #endif - mysql_mutex_unlock(&LOCK_thd_kill); - mysql_mutex_unlock(&LOCK_thd_data); } /* @@ -1539,6 +1512,9 @@ THD::~THD() mysql_mutex_lock(&LOCK_thd_data); mysql_mutex_unlock(&LOCK_thd_data); +#ifdef WITH_WSREP + delete wsrep_rgi; +#endif if (!free_connection_done) free_connection(); -- cgit v1.2.1 From 9b750dcbd89ecf455211a77348a85464b282abee Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 11 Jan 2021 13:21:42 +0100 Subject: MDEV-23536 Race condition between KILL and transaction commit Server part: kill_handlerton() was accessing thd->ha_data[] for some other thd, while it could be concurrently modified by its owner thd. protect thd->ha_data[] modifications with a mutex. require this mutex when accessing thd->ha_data[] from kill_handlerton. InnoDB part: on close_connection, detach trx from thd before freeing the trx --- sql/sql_class.cc | 3 +++ 1 file changed, 3 insertions(+) (limited to 'sql/sql_class.cc') diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 2595717572a..c3274ae9b82 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -444,6 +444,7 @@ void thd_set_ha_data(THD *thd, const struct handlerton *hton, const void *ha_data) { plugin_ref *lock= &thd->ha_data[hton->slot].lock; + DBUG_ASSERT(thd == current_thd); if (ha_data && !*lock) *lock= ha_lock_engine(NULL, (handlerton*) hton); else if (!ha_data && *lock) @@ -451,7 +452,9 @@ void thd_set_ha_data(THD *thd, const struct handlerton *hton, plugin_unlock(NULL, *lock); *lock= NULL; } + mysql_mutex_lock(&thd->LOCK_thd_data); *thd_ha_data(thd, hton)= (void*) ha_data; + mysql_mutex_unlock(&thd->LOCK_thd_data); } -- cgit v1.2.1 From beaea31ab12ab56ea8a6eb5e99cf82648675ea78 Mon Sep 17 00:00:00 2001 From: sjaakola Date: Wed, 9 Dec 2020 21:53:18 +0200 Subject: MDEV-23851 BF-BF Conflict issue because of UK GAP locks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some DML operations on tables having unique secondary keys cause scanning in the secondary index, for instance to find potential unique key violations in the seconday index. This scanning may involve GAP locking in the index. As this locking happens also when applying replication events in high priority applier threads, there is a probabality for lock conflicts between two wsrep high priority threads. This PR avoids lock conflicts of high priority wsrep threads, which do secondary index scanning e.g. for duplicate key detection. The actual fix is the patch in sql_class.cc:thd_need_ordering_with(), where we allow relaxed GAP locking protocol between wsrep high priority threads. wsrep high priority threads (replication appliers, replayers and TOI processors) are ordered by the replication provider, and they will not need serializability support gained by secondary index GAP locks. PR contains also a mtr test, which exercises a scenario where two replication applier threads have a false positive conflict in GAP of unique secondary index. The conflicting local committing transaction has to replay, and the test verifies also that the replaying phase will not conflict with the latter repllication applier. Commit also contains new test scenario for galera.galera_UK_conflict.test, where replayer starts applying after a slave applier thread, with later seqno, has advanced to commit phase. The applier and replayer have false positive GAP lock conflict on secondary unique index, and replayer should ignore this. This test scenario caused crash with earlier version in this PR, and to fix this, the secondary index uniquenes checking has been relaxed even further. Now innodb trx_t structure has new member: bool wsrep_UK_scan, which is set to true, when high priority thread is performing unique secondary index scanning. The member trx_t::wsrep_UK_scan is defined inside WITH_WSREP directive, to make it possible to prepare a MariaDB build where this additional trx_t member is not present and is not used in the code base. trx->wsrep_UK_scan is set to true only for the duration of function call for: lock_rec_lock() trx->wsrep_UK_scan is used only in lock_rec_has_to_wait() function to relax the need to wait if wsrep_UK_scan is set and conflicting transaction is also high priority. Reviewed-by: Jan Lindström --- sql/sql_class.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'sql/sql_class.cc') diff --git a/sql/sql_class.cc b/sql/sql_class.cc index c3274ae9b82..92736eacee2 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1,6 +1,6 @@ /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. - Copyright (c) 2008, 2020, MariaDB Corporation. + Copyright (c) 2008, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -4730,6 +4730,16 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) DBUG_EXECUTE_IF("disable_thd_need_ordering_with", return 1;); if (!thd || !other_thd) return 1; +#ifdef WITH_WSREP + /* wsrep applier, replayer and TOI processing threads are ordered + by replication provider, relaxed GAP locking protocol can be used + between high priority wsrep threads + */ + if (WSREP_ON && + wsrep_thd_is_BF(const_cast(thd), false) && + wsrep_thd_is_BF(const_cast(other_thd), true)) + return 0; +#endif /* WITH_WSREP */ rgi= thd->rgi_slave; other_rgi= other_thd->rgi_slave; if (!rgi || !other_rgi) -- cgit v1.2.1 From 259a1902a066d01547e5d70ba0e4837d1be62e7b Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 5 Feb 2021 15:00:38 +0100 Subject: cleanup: THD::abort_current_cond_wait() * reuse the loop in THD::abort_current_cond_wait, don't duplicate it * find_thread_by_id should return whatever it has found, it's the caller's task not to kill COM_DAEMON (if the caller's a killer) and other minor changes --- sql/sql_class.cc | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) (limited to 'sql/sql_class.cc') diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 5c1ad49c9de..d815dd56647 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -49,9 +49,6 @@ #include #include #include -#ifdef __WIN__0 -#include -#endif #include #include @@ -70,6 +67,8 @@ #ifdef WITH_WSREP #include "wsrep_thd.h" #include "wsrep_trans_observer.h" +#else +static inline bool wsrep_is_bf_aborted(THD* thd) { return false; } #endif /* WITH_WSREP */ #include "opt_trace.h" @@ -1902,15 +1901,21 @@ void THD::awake_no_mutex(killed_state state_to_set) } /* Interrupt target waiting inside a storage engine. */ - if (IF_WSREP(state_to_set != NOT_KILLED && !wsrep_is_bf_aborted(this), - state_to_set != NOT_KILLED)) + if (state_to_set != NOT_KILLED && !wsrep_is_bf_aborted(this)) ha_kill_query(this, thd_kill_level(this)); - /* Broadcast a condition to kick the target if it is waiting on it. */ + abort_current_cond_wait(false); + DBUG_VOID_RETURN; +} + +/* Broadcast a condition to kick the target if it is waiting on it. */ +void THD::abort_current_cond_wait(bool force) +{ + mysql_mutex_assert_owner(&LOCK_thd_kill); if (mysys_var) { mysql_mutex_lock(&mysys_var->mutex); - if (!system_thread) // Don't abort locks + if (!system_thread || force) // Don't abort locks mysys_var->abort=1; /* @@ -1968,7 +1973,6 @@ void THD::awake_no_mutex(killed_state state_to_set) } mysql_mutex_unlock(&mysys_var->mutex); } - DBUG_VOID_RETURN; } @@ -2022,16 +2026,7 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use, mysql_mutex_lock(&in_use->LOCK_thd_kill); if (in_use->killed < KILL_CONNECTION) in_use->set_killed_no_mutex(KILL_CONNECTION); - if (in_use->mysys_var) - { - mysql_mutex_lock(&in_use->mysys_var->mutex); - if (in_use->mysys_var->current_cond) - mysql_cond_broadcast(in_use->mysys_var->current_cond); - - /* Abort if about to wait in thr_upgrade_write_delay_lock */ - in_use->mysys_var->abort= 1; - mysql_mutex_unlock(&in_use->mysys_var->mutex); - } + in_use->abort_current_cond_wait(true); mysql_mutex_unlock(&in_use->LOCK_thd_kill); signalled= TRUE; } -- cgit v1.2.1 From 9703cffa8cb57e2fe29719f4aae3282bfae82878 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 5 Feb 2021 14:59:27 +0100 Subject: don't take mutexes conditionally --- sql/sql_class.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sql/sql_class.cc') diff --git a/sql/sql_class.cc b/sql/sql_class.cc index d815dd56647..dc3903661e1 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1868,7 +1868,7 @@ void THD::awake_no_mutex(killed_state state_to_set) DBUG_PRINT("enter", ("this: %p current_thd: %p state: %d", this, current_thd, (int) state_to_set)); THD_CHECK_SENTRY(this); - if (WSREP_NNULL(this)) mysql_mutex_assert_owner(&LOCK_thd_data); + mysql_mutex_assert_owner(&LOCK_thd_data); mysql_mutex_assert_owner(&LOCK_thd_kill); print_aborted_warning(3, "KILLED"); -- cgit v1.2.1 From eac8341df4c3c7b98360f4e9498acf393dc055e3 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sun, 7 Feb 2021 17:48:58 +0100 Subject: MDEV-23328 Server hang due to Galera lock conflict resolution adaptation of 29bbcac0ee8 for 10.4 --- sql/sql_class.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'sql/sql_class.cc') diff --git a/sql/sql_class.cc b/sql/sql_class.cc index dc3903661e1..81718595fec 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -799,6 +799,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST); mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST); mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0); + mysql_mutex_record_order(&LOCK_thd_kill, &LOCK_thd_data); /* Variables with default values */ proc_info="login"; @@ -5058,11 +5059,13 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd) #ifdef WITH_WSREP /* wsrep applier, replayer and TOI processing threads are ordered by replication provider, relaxed GAP locking protocol can be used - between high priority wsrep threads + between high priority wsrep threads. + Note that wsrep_thd_is_BF() doesn't take LOCK_thd_data for either thd, + the caller should guarantee that the BF state won't change. + (e.g. InnoDB does it by keeping lock_sys.mutex locked) */ - if (WSREP_ON && - wsrep_thd_is_BF(const_cast(thd), false) && - wsrep_thd_is_BF(const_cast(other_thd), true)) + if (WSREP_ON && wsrep_thd_is_BF(thd, false) && + wsrep_thd_is_BF(other_thd, false)) return 0; #endif /* WITH_WSREP */ rgi= thd->rgi_slave; -- cgit v1.2.1