summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Lindström <jan.lindstrom@mariadb.com>2021-10-21 13:48:59 +0300
committerJan Lindström <jan.lindstrom@mariadb.com>2021-10-29 10:00:05 +0300
commit30337addfc34c882fc6772aa3d820e0ffb52e3b9 (patch)
treef32bb143600cb86988f034c388e4ada82ee83f6d
parent2ddea602ce18054ad5b6130a692b509506d2bde7 (diff)
downloadmariadb-git-30337addfc34c882fc6772aa3d820e0ffb52e3b9.tar.gz
MDEV-25114: Crash: WSREP: invalid state ROLLED_BACK (FATAL)
Revert "MDEV-23328 Server hang due to Galera lock conflict resolution" This reverts commit 29bbcac0ee841faaa68eeb09c86ff825eabbe6b6.
-rw-r--r--sql/handler.cc2
-rw-r--r--sql/wsrep_mysqld.cc4
-rw-r--r--storage/innobase/handler/ha_innodb.cc179
3 files changed, 74 insertions, 111 deletions
diff --git a/sql/handler.cc b/sql/handler.cc
index bbd0f3bf515..cf80bfb6249 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -835,11 +835,9 @@ static my_bool kill_handlerton(THD *thd, plugin_ref plugin,
{
handlerton *hton= plugin_hton(plugin);
- mysql_mutex_lock(&thd->LOCK_thd_data);
if (hton->state == SHOW_OPTION_YES && hton->kill_query &&
thd_get_ha_data(thd, hton))
hton->kill_query(hton, thd, *(enum thd_kill_levels *) level);
- mysql_mutex_unlock(&thd->LOCK_thd_data);
return FALSE;
}
diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc
index ce798a918e3..9f152d2a20c 100644
--- a/sql/wsrep_mysqld.cc
+++ b/sql/wsrep_mysqld.cc
@@ -2747,7 +2747,9 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal)
{
if (signal)
{
- thd->awake_no_mutex(KILL_QUERY);
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ thd->awake(KILL_QUERY);
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
}
else
{
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 32636dbb41b..466a78890a0 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -60,7 +60,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include <my_service_manager.h>
#include <key.h>
-#include <sql_manager.h>
/* Include necessary InnoDB headers */
#include "btr0btr.h"
@@ -18795,64 +18794,60 @@ wsrep_abort_slave_trx(
(long long)bf_seqno, (long long)victim_seqno);
abort();
}
-
-struct bg_wsrep_kill_trx_arg {
- my_thread_id thd_id;
- trx_id_t trx_id;
- int64_t bf_seqno;
- ibool signal;
-};
-
-static void bg_wsrep_kill_trx(
- void *void_arg)
+/*******************************************************************//**
+This function is used to kill one transaction in BF. */
+UNIV_INTERN
+void
+wsrep_innobase_kill_one_trx(
+/*========================*/
+ MYSQL_THD const bf_thd,
+ const trx_t * const bf_trx,
+ trx_t *victim_trx,
+ ibool signal)
{
- bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)void_arg;
- THD *thd = find_thread_by_id(arg->thd_id, false);
- trx_t *victim_trx = NULL;
- bool awake = false;
- DBUG_ENTER("bg_wsrep_kill_trx");
+ ut_ad(bf_thd);
+ ut_ad(victim_trx);
+ ut_ad(lock_mutex_own());
+ ut_ad(trx_mutex_own(victim_trx));
- if (thd) {
- wsrep_thd_LOCK(thd);
- victim_trx= thd_to_trx(thd);
- /* Victim trx might not exist e.g. on MDL-conflict. */
- if (victim_trx) {
- lock_mutex_enter();
- trx_mutex_enter(victim_trx);
- if (victim_trx->id != arg->trx_id ||
- victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY)
- {
- /* Victim was meanwhile rolled back or
- committed */
- lock_mutex_exit();
- trx_mutex_exit(victim_trx);
- goto no_victim;
- }
- } else {
-no_victim:
- wsrep_thd_UNLOCK(thd);
- /* find_thread_by_id() acquired THD::LOCK_kill_data */
- wsrep_thd_kill_UNLOCK(thd);
- goto ret;
- }
- wsrep_thd_UNLOCK(thd);
+ DBUG_ENTER("wsrep_innobase_kill_one_trx");
+ THD *thd = (THD *) victim_trx->mysql_thd;
+ int64_t bf_seqno = wsrep_thd_trx_seqno(bf_thd);
+ if (!thd) {
+ DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
+ WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id);
+ DBUG_VOID_RETURN;
}
+ WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
+
WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF
"), victim: (%lu) trx: " TRX_ID_FMT,
- arg->signal, arg->bf_seqno,
+ signal, bf_seqno,
thd_get_thread_id(thd),
victim_trx->id);
WSREP_DEBUG("Aborting query: %s conf %d trx: %" PRId64,
- (wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void",
+ (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void",
wsrep_thd_conflict_state(thd, FALSE),
wsrep_thd_ws_handle(thd)->trx_id);
+ wsrep_thd_LOCK(thd);
+ DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
+ {
+ const char act[]=
+ "now "
+ "wait_for signal.wsrep_after_BF_victim_lock";
+ DBUG_ASSERT(!debug_sync_set_action(bf_thd,
+ STRING_WITH_LEN(act)));
+ };);
+
+
if (wsrep_thd_query_state(thd) == QUERY_EXITING) {
WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT,
victim_trx->id);
- goto ret_unlock;
+ wsrep_thd_UNLOCK(thd);
+ DBUG_VOID_RETURN;
}
if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
@@ -18868,13 +18863,18 @@ no_victim:
case MUST_ABORT:
WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state",
victim_trx->id);
- goto ret_awake;
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
+ DBUG_VOID_RETURN;
+ break;
case ABORTED:
case ABORTING: // fall through
default:
WSREP_DEBUG("victim " TRX_ID_FMT " in state %d",
victim_trx->id, wsrep_thd_get_conflict_state(thd));
- goto ret_unlock;
+ wsrep_thd_UNLOCK(thd);
+ DBUG_VOID_RETURN;
+ break;
}
switch (wsrep_thd_query_state(thd)) {
@@ -18887,12 +18887,12 @@ no_victim:
victim_trx->id);
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
- wsrep_abort_slave_trx(arg->bf_seqno,
+ wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
} else {
wsrep_t *wsrep= get_wsrep();
rcode = wsrep->abort_pre_commit(
- wsrep, arg->bf_seqno,
+ wsrep, bf_seqno,
(wsrep_trx_id_t)wsrep_thd_ws_handle(thd)->trx_id
);
@@ -18901,7 +18901,10 @@ no_victim:
WSREP_DEBUG("cancel commit warning: "
TRX_ID_FMT,
victim_trx->id);
- goto ret_awake;
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
+ DBUG_VOID_RETURN;
+ break;
case WSREP_OK:
break;
default:
@@ -18914,9 +18917,12 @@ no_victim:
* kill the lock holder first.
*/
abort();
+ break;
}
}
- goto ret_awake;
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
+ break;
case QUERY_EXEC:
/* it is possible that victim trx is itself waiting for some
* other lock. We need to cancel this waiting
@@ -18937,20 +18943,26 @@ no_victim:
lock_cancel_waiting_and_release(wait_lock);
}
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
} else {
/* abort currently executing query */
DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu",
thd_get_thread_id(thd)));
WSREP_DEBUG("kill query for: %ld",
thd_get_thread_id(thd));
+ /* Note that innobase_kill_query will take lock_mutex
+ and trx_mutex */
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
/* for BF thd, we need to prevent him from committing */
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
- wsrep_abort_slave_trx(arg->bf_seqno,
+ wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
}
}
- goto ret_awake;
+ break;
case QUERY_IDLE:
{
WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id);
@@ -18958,9 +18970,10 @@ no_victim:
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
WSREP_DEBUG("kill BF IDLE, seqno: %lld",
(long long)wsrep_thd_trx_seqno(thd));
- wsrep_abort_slave_trx(arg->bf_seqno,
+ wsrep_thd_UNLOCK(thd);
+ wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
- goto ret_unlock;
+ DBUG_VOID_RETURN;
}
/* This will lock thd from proceeding after net_read() */
wsrep_thd_set_conflict_state(thd, ABORTING);
@@ -18981,67 +18994,17 @@ no_victim:
DBUG_PRINT("wsrep",("signalling wsrep rollbacker"));
WSREP_DEBUG("signaling aborter");
wsrep_unlock_rollback();
- goto ret_unlock;
+ wsrep_thd_UNLOCK(thd);
+
+ break;
}
default:
WSREP_WARN("bad wsrep query state: %d",
wsrep_thd_query_state(thd));
- goto ret_unlock;
+ wsrep_thd_UNLOCK(thd);
+ break;
}
-ret_awake:
- awake= true;
-
-ret_unlock:
- trx_mutex_exit(victim_trx);
- lock_mutex_exit();
- if (awake)
- wsrep_thd_awake(thd, arg->signal);
- wsrep_thd_kill_UNLOCK(thd);
-
-ret:
- free(arg);
- DBUG_VOID_RETURN;
-
-}
-
-/*******************************************************************//**
-This function is used to kill one transaction in BF. */
-UNIV_INTERN
-void
-wsrep_innobase_kill_one_trx(
-/*========================*/
- MYSQL_THD const bf_thd,
- const trx_t * const bf_trx,
- trx_t *victim_trx,
- ibool signal)
-{
- ut_ad(bf_thd);
- ut_ad(victim_trx);
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(victim_trx));
-
- bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg));
- arg->thd_id = thd_get_thread_id(victim_trx->mysql_thd);
- arg->trx_id = victim_trx->id;
- arg->bf_seqno = wsrep_thd_trx_seqno((THD*)bf_thd);
- arg->signal = signal;
-
- DBUG_ENTER("wsrep_innobase_kill_one_trx");
-
- WSREP_LOG_CONFLICT(bf_thd, victim_trx->mysql_thd, TRUE);
-
- DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
- {
- const char act[]=
- "now "
- "wait_for signal.wsrep_after_BF_victim_lock";
- DBUG_ASSERT(!debug_sync_set_action(bf_thd,
- STRING_WITH_LEN(act)));
- };);
-
-
- mysql_manager_submit(bg_wsrep_kill_trx, arg);
DBUG_VOID_RETURN;
}