summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorSergei Golubchik <serg@mariadb.org>2021-02-07 17:48:58 +0100
committerSergei Golubchik <serg@mariadb.org>2021-02-12 18:17:06 +0100
commiteac8341df4c3c7b98360f4e9498acf393dc055e3 (patch)
tree6e68141971047c34568b42451f1510ba906d4497 /sql
parent9703cffa8cb57e2fe29719f4aae3282bfae82878 (diff)
downloadmariadb-git-eac8341df4c3c7b98360f4e9498acf393dc055e3.tar.gz
MDEV-23328 Server hang due to Galera lock conflict resolution
adaptation of 29bbcac0ee8 for 10.4
Diffstat (limited to 'sql')
-rw-r--r--sql/log_event.cc22
-rw-r--r--sql/service_wsrep.cc16
-rw-r--r--sql/slave.cc2
-rw-r--r--sql/sql_class.cc11
-rw-r--r--sql/sql_class.h4
-rw-r--r--sql/sql_parse.cc4
-rw-r--r--sql/sql_repl.cc2
-rw-r--r--sql/wsrep_client_service.cc7
-rw-r--r--sql/wsrep_server_service.cc9
9 files changed, 36 insertions, 41 deletions
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 89ffebf2659..337de3508ed 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -8983,8 +8983,20 @@ err:
}
#endif /* MYSQL_CLIENT */
-
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
+static bool wsrep_must_replay(THD *thd)
+{
+#ifdef WITH_WSREP
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ bool res= WSREP(thd) && thd->wsrep_trx().state() == wsrep::transaction::s_must_replay;
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
+ return res;
+#else
+ return false;
+#endif
+}
+
+
int Xid_log_event::do_apply_event(rpl_group_info *rgi)
{
bool res;
@@ -9049,14 +9061,8 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi)
res= trans_commit(thd); /* Automatically rolls back on error. */
thd->release_transactional_locks();
- mysql_mutex_lock(&thd->LOCK_thd_data);
-#ifdef WITH_WSREP
- if (sub_id && (!res || (WSREP(thd) && thd->wsrep_trx().state() == wsrep::transaction::s_must_replay)))
-#else
- if (sub_id && !res)
-#endif /* WITH_WSREP */
+ if (sub_id && (!res || wsrep_must_replay(thd)))
rpl_global_gtid_slave_state->update_state_hash(sub_id, &gtid, hton, rgi);
- mysql_mutex_unlock(&thd->LOCK_thd_data);
/*
Increment the global status commit count variable
*/
diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc
index f0a4cf81c02..80f164855b2 100644
--- a/sql/service_wsrep.cc
+++ b/sql/service_wsrep.cc
@@ -210,16 +210,8 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
my_bool signal)
{
- DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
- {
- const char act[]=
- "now "
- "SIGNAL sync.before_wsrep_thd_abort_reached "
- "WAIT_FOR signal.before_wsrep_thd_abort";
- DBUG_ASSERT(!debug_sync_set_action(bf_thd,
- STRING_WITH_LEN(act)));
- };);
-
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
+ mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data);
my_bool ret= wsrep_bf_abort(bf_thd, victim_thd);
/*
Send awake signal if victim was BF aborted or does not
@@ -228,8 +220,6 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
*/
if ((ret || !wsrep_on(victim_thd)) && signal)
{
- mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data);
- mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_kill);
mysql_mutex_lock(&victim_thd->LOCK_thd_data);
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
@@ -240,10 +230,8 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
return false;
}
- mysql_mutex_lock(&victim_thd->LOCK_thd_kill);
victim_thd->wsrep_aborter= bf_thd->thread_id;
victim_thd->awake_no_mutex(KILL_QUERY);
- mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
} else {
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake");
diff --git a/sql/slave.cc b/sql/slave.cc
index 372e46acd1d..31bd9372a14 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1069,8 +1069,8 @@ terminate_slave_thread(THD *thd,
int error __attribute__((unused));
DBUG_PRINT("loop", ("killing slave thread"));
- mysql_mutex_lock(&thd->LOCK_thd_data);
mysql_mutex_lock(&thd->LOCK_thd_kill);
+ mysql_mutex_lock(&thd->LOCK_thd_data);
#ifndef DONT_USE_THR_ALARM
/*
Error codes from pthread_kill are:
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index dc3903661e1..81718595fec 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -799,6 +799,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier)
mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST);
mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0);
+ mysql_mutex_record_order(&LOCK_thd_kill, &LOCK_thd_data);
/* Variables with default values */
proc_info="login";
@@ -5058,11 +5059,13 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)
#ifdef WITH_WSREP
/* wsrep applier, replayer and TOI processing threads are ordered
by replication provider, relaxed GAP locking protocol can be used
- between high priority wsrep threads
+ between high priority wsrep threads.
+ Note that wsrep_thd_is_BF() doesn't take LOCK_thd_data for either thd,
+ the caller should guarantee that the BF state won't change.
+ (e.g. InnoDB does it by keeping lock_sys.mutex locked)
*/
- if (WSREP_ON &&
- wsrep_thd_is_BF(const_cast<THD *>(thd), false) &&
- wsrep_thd_is_BF(const_cast<THD *>(other_thd), true))
+ if (WSREP_ON && wsrep_thd_is_BF(thd, false) &&
+ wsrep_thd_is_BF(other_thd, false))
return 0;
#endif /* WITH_WSREP */
rgi= thd->rgi_slave;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 39d6ec1027f..4eabd3da450 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -3309,11 +3309,11 @@ public:
void awake_no_mutex(killed_state state_to_set);
void awake(killed_state state_to_set)
{
- mysql_mutex_lock(&LOCK_thd_data);
mysql_mutex_lock(&LOCK_thd_kill);
+ mysql_mutex_lock(&LOCK_thd_data);
awake_no_mutex(state_to_set);
- mysql_mutex_unlock(&LOCK_thd_kill);
mysql_mutex_unlock(&LOCK_thd_data);
+ mysql_mutex_unlock(&LOCK_thd_kill);
}
void abort_current_cond_wait(bool force);
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index dd0e5cfa34e..d71d29bc85a 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -9102,8 +9102,8 @@ static my_bool find_thread_with_thd_data_lock_callback(THD *thd, find_thread_cal
{
if (arg->id == (arg->query_id ? thd->query_id : (longlong) thd->thread_id))
{
- mysql_mutex_lock(&thd->LOCK_thd_data);
mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
+ mysql_mutex_lock(&thd->LOCK_thd_data); // XXX DELME
arg->thd= thd;
return 1;
}
@@ -9238,8 +9238,8 @@ static my_bool kill_threads_callback(THD *thd, kill_threads_callback_arg *arg)
return 1;
if (!arg->threads_to_kill.push_back(thd, arg->thd->mem_root))
{
- mysql_mutex_lock(&thd->LOCK_thd_data);
mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
+ mysql_mutex_lock(&thd->LOCK_thd_data);
}
}
}
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index a0d8b4ca6d1..6a6cfb2aa5f 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -3474,8 +3474,8 @@ static my_bool kill_callback(THD *thd, kill_callback_arg *arg)
thd->variables.server_id == arg->slave_server_id)
{
arg->thd= thd;
- mysql_mutex_lock(&thd->LOCK_thd_data);
mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
+ mysql_mutex_lock(&thd->LOCK_thd_data);
return 1;
}
return 0;
diff --git a/sql/wsrep_client_service.cc b/sql/wsrep_client_service.cc
index 245fc1487ca..89621619a23 100644
--- a/sql/wsrep_client_service.cc
+++ b/sql/wsrep_client_service.cc
@@ -69,20 +69,13 @@ bool Wsrep_client_service::interrupted(
wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED) const
{
DBUG_ASSERT(m_thd == current_thd);
- /* Underlying mutex in lock object points to LOCK_thd_data, which
- protects m_thd->wsrep_trx(), LOCK_thd_kill protects m_thd->killed.
- Locking order is:
- 1) LOCK_thd_data
- 2) LOCK_thd_kill */
mysql_mutex_assert_owner(static_cast<mysql_mutex_t*>(lock.mutex()->native()));
- mysql_mutex_lock(&m_thd->LOCK_thd_kill);
bool ret= (m_thd->killed != NOT_KILLED);
if (ret)
{
WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d",
m_thd->killed, m_thd->wsrep_trx().state());
}
- mysql_mutex_unlock(&m_thd->LOCK_thd_kill);
return ret;
}
diff --git a/sql/wsrep_server_service.cc b/sql/wsrep_server_service.cc
index cd432ab3eae..19259a43925 100644
--- a/sql/wsrep_server_service.cc
+++ b/sql/wsrep_server_service.cc
@@ -40,6 +40,7 @@ static void init_service_thd(THD* thd, char* thread_stack)
thd->prior_thr_create_utime= thd->start_utime= microsecond_interval_timer();
thd->set_command(COM_SLEEP);
thd->reset_for_next_command(true);
+ server_threads.insert(thd); // as wsrep_innobase_kill_one_trx() uses find_thread_by_id()
}
Wsrep_storage_service*
@@ -79,6 +80,7 @@ void Wsrep_server_service::release_storage_service(
static_cast<Wsrep_storage_service*>(storage_service);
THD* thd= ss->m_thd;
wsrep_reset_threadvars(thd);
+ server_threads.erase(thd);
delete ss;
delete thd;
}
@@ -92,7 +94,8 @@ wsrep_create_streaming_applier(THD *orig_thd, const char *ctx)
streaming transaction is BF aborted and streaming applier
is created from BF aborter context. */
Wsrep_threadvars saved_threadvars(wsrep_save_threadvars());
- wsrep_reset_threadvars(saved_threadvars.cur_thd);
+ if (saved_threadvars.cur_thd)
+ wsrep_reset_threadvars(saved_threadvars.cur_thd);
THD *thd= 0;
Wsrep_applier_service *ret= 0;
if (!wsrep_create_threadvars() &&
@@ -109,7 +112,8 @@ wsrep_create_streaming_applier(THD *orig_thd, const char *ctx)
}
/* Restore original thread local storage state before returning. */
wsrep_restore_threadvars(saved_threadvars);
- wsrep_store_threadvars(saved_threadvars.cur_thd);
+ if (saved_threadvars.cur_thd)
+ wsrep_store_threadvars(saved_threadvars.cur_thd);
return ret;
}
@@ -138,6 +142,7 @@ void Wsrep_server_service::release_high_priority_service(wsrep::high_priority_se
THD* thd= hps->m_thd;
delete hps;
wsrep_store_threadvars(thd);
+ server_threads.erase(thd);
delete thd;
wsrep_delete_threadvars();
}