summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Lindström <jan.lindstrom@mariadb.com>2020-12-17 14:20:23 +0200
committerJan Lindström <jan.lindstrom@mariadb.com>2020-12-29 12:42:01 +0200
commitf35c2e65a7a3df5ee71b0a48474835337f843872 (patch)
tree32ba52a10559b4123588f6badc1145da860229e5
parente59c1cef3bc4016f9fa9d7a0f6935463b7283a58 (diff)
downloadmariadb-git-bb-10.3-MDEV-23536.tar.gz
MDEV-23536 : Race condition between KILL and transaction commitbb-10.3-MDEV-23536
A race condition may occur between the execution of transaction commit, and an execution of a KILL statement that would attempt to abort that transaction. MDEV-17092 worked around this race condition by modifying InnoDB code. After that issue was closed, Sergey Vojtovich pointed out that this race condition would better be fixed above the storage engine layer: If you look carefully into the above, you can conclude that thd->free_connection() can be called concurrently with KILL/thd->awake(). Which is the bug. And it is partially fixed in THD::~THD(), that is destructor waits for KILL completion: Fix: Add necessary mutex operations to THD::free_connection() and move WSREP specific code also there. This ensures that no one is using THD while we do free_connection(). These mutexes will also ensures that there can't be concurrent KILL/THD::awake(). innobase_kill_query We can now remove usage of trx_sys_mutex introduced on MDEV-17092. trx_t::free() Poison trx->state and trx->mysql_thd This patch is validated with an RQG run similar to the one that reproduced MDEV-17092.
-rw-r--r--sql/sql_class.cc37
-rw-r--r--sql/sql_class.h2
-rw-r--r--storage/innobase/handler/ha_innodb.cc26
-rw-r--r--storage/innobase/lock/lock0lock.cc1
-rw-r--r--storage/innobase/trx/trx0trx.cc4
5 files changed, 41 insertions, 29 deletions
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index a7057e3d5d4..275900396f1 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1458,12 +1458,15 @@ void THD::reset_db(const LEX_CSTRING *new_db)
/* Do operations that may take a long time */
-void THD::cleanup(void)
+void THD::cleanup(bool have_mutex)
{
DBUG_ENTER("THD::cleanup");
DBUG_ASSERT(cleanup_done == 0);
- set_killed(KILL_CONNECTION);
+ if (have_mutex)
+ set_killed_no_mutex(KILL_CONNECTION,0,0);
+ else
+ set_killed(KILL_CONNECTION);
#ifdef ENABLE_WHEN_BINLOG_WILL_BE_ABLE_TO_PREPARE
if (transaction.xid_state.xa_state == XA_PREPARED)
{
@@ -1541,8 +1544,31 @@ void THD::cleanup(void)
void THD::free_connection()
{
DBUG_ASSERT(free_connection_done == 0);
+ /* Check that we have already called thd->unlink() */
+ DBUG_ASSERT(prev == 0 && next == 0);
+
+ /*
+ Other threads may have a lock on THD::LOCK_thd_data or
+ THD::LOCK_thd_kill to ensure that this THD is not deleted
+ while they access it. The following mutex_lock ensures
+ that no one else is using this THD and it's now safe to
+ continue.
+
+ For example consider KILL-statement execution on
+ sql_parse.cc kill_one_thread() that will use
+ THD::LOCK_thd_data to protect victim thread during
+ THD::awake().
+ */
+ mysql_mutex_lock(&LOCK_thd_data);
+ mysql_mutex_lock(&LOCK_thd_kill);
+
+#ifdef WITH_WSREP
+ delete wsrep_rgi;
+ wsrep_rgi= 0;
+#endif /* WITH_WSREP */
my_free((char*) db.str);
db= null_clex_str;
+
#ifndef EMBEDDED_LIBRARY
if (net.vio)
vio_delete(net.vio);
@@ -1550,7 +1576,7 @@ void THD::free_connection()
net_end(&net);
#endif
if (!cleanup_done)
- cleanup();
+ cleanup(true); // We have locked THD::LOCK_thd_kill
ha_close_connection(this);
plugin_thdvar_cleanup(this);
mysql_audit_free_thd(this);
@@ -1561,6 +1587,8 @@ void THD::free_connection()
#if defined(ENABLED_PROFILING)
profiling.restart(); // Reset profiling
#endif
+ mysql_mutex_unlock(&LOCK_thd_kill);
+ mysql_mutex_unlock(&LOCK_thd_data);
}
/*
@@ -1620,9 +1648,6 @@ THD::~THD()
mysql_mutex_lock(&LOCK_thd_kill);
mysql_mutex_unlock(&LOCK_thd_kill);
-#ifdef WITH_WSREP
- delete wsrep_rgi;
-#endif
if (!free_connection_done)
free_connection();
diff --git a/sql/sql_class.h b/sql/sql_class.h
index a22102394b8..140394fefc1 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -3291,7 +3291,7 @@ public:
void update_all_stats();
void update_stats(void);
void change_user(void);
- void cleanup(void);
+ void cleanup(bool have_mutex=false);
void cleanup_after_query();
void free_connection();
void reset_for_reuse();
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index c6ca287b8f4..848591234f4 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -5082,29 +5082,15 @@ static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels)
if (trx_t* trx= thd_to_trx(thd))
{
+ ut_ad(trx->mysql_thd == thd);
lock_mutex_enter();
- mutex_enter(&trx_sys.mutex);
- trx_mutex_enter(trx);
- /* It is possible that innobase_close_connection() is concurrently
- being executed on our victim. Even if the trx object is later
- reused for another client connection or a background transaction,
- its trx->mysql_thd will differ from our thd.
-
- trx_t::state changes are protected by trx_t::mutex, and
- trx_sys.trx_list is protected by trx_sys.mutex, in
- both trx_create() and trx_t::free().
-
- At this point, trx may have been reallocated for another client
- connection, or for a background operation. In that case, either
- trx_t::state or trx_t::mysql_thd should not match our expectations. */
- bool cancel= trx->mysql_thd == thd && trx->state == TRX_STATE_ACTIVE &&
- !trx->lock.was_chosen_as_deadlock_victim;
- mutex_exit(&trx_sys.mutex);
- if (!cancel);
- else if (lock_t *lock= trx->lock.wait_lock)
+ if (lock_t *lock= trx->lock.wait_lock)
+ {
+ trx_mutex_enter(trx);
lock_cancel_waiting_and_release(lock);
+ trx_mutex_exit(trx);
+ }
lock_mutex_exit();
- trx_mutex_exit(trx);
}
DBUG_VOID_RETURN;
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index f52d1810381..a633d440519 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -6153,6 +6153,7 @@ lock_cancel_waiting_and_release(
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(lock->trx));
+ ut_ad(lock->trx->state == TRX_STATE_ACTIVE);
lock->trx->lock.cancel = true;
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index c92af8c2b84..1511f3b5b77 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -410,7 +410,7 @@ void trx_t::free()
/* do not poison mutex */
MEM_NOACCESS(&id, sizeof id);
MEM_NOACCESS(&no, sizeof no);
- /* state is accessed by innobase_kill_connection() */
+ MEM_NOACCESS(&state, sizeof state);
MEM_NOACCESS(&is_recovered, sizeof is_recovered);
#ifdef WITH_WSREP
MEM_NOACCESS(&wsrep, sizeof wsrep);
@@ -435,7 +435,7 @@ void trx_t::free()
MEM_NOACCESS(&start_time_micro, sizeof start_time_micro);
MEM_NOACCESS(&commit_lsn, sizeof commit_lsn);
MEM_NOACCESS(&table_id, sizeof table_id);
- /* mysql_thd is accessed by innobase_kill_connection() */
+ MEM_NOACCESS(&mysql_thd, sizeof mysql_thd);
MEM_NOACCESS(&mysql_log_file_name, sizeof mysql_log_file_name);
MEM_NOACCESS(&mysql_log_offset, sizeof mysql_log_offset);
MEM_NOACCESS(&n_mysql_tables_in_use, sizeof n_mysql_tables_in_use);