summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Lindström <jan.lindstrom@mariadb.com>2021-10-21 13:48:59 +0300
committerJan Lindström <jan.lindstrom@mariadb.com>2021-10-29 07:57:03 +0300
commitc8b39f7ee2a9910fae2f03563b3ea337c45132c2 (patch)
tree5c571661b1d08479fd377ca7007ee33dfd68de50
parentb3cdf4168cbfd4b20db01b8296f5800529f5713f (diff)
downloadmariadb-git-c8b39f7ee2a9910fae2f03563b3ea337c45132c2.tar.gz
MDEV-25114: Crash: WSREP: invalid state ROLLED_BACK (FATAL)
Revert "MDEV-23328 Server hang due to Galera lock conflict resolution" This reverts commit 29bbcac0ee841faaa68eeb09c86ff825eabbe6b6.
-rw-r--r--sql/wsrep_mysqld.cc2
-rw-r--r--storage/innobase/handler/ha_innodb.cc184
2 files changed, 74 insertions, 112 deletions
diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc
index f22d8bf0f5a..e60100e2e90 100644
--- a/sql/wsrep_mysqld.cc
+++ b/sql/wsrep_mysqld.cc
@@ -2762,7 +2762,9 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal)
{
if (signal)
{
+ mysql_mutex_lock(&thd->LOCK_thd_data);
thd->awake(KILL_QUERY);
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
}
else
{
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 37748cb497a..57e7ec236c0 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -60,7 +60,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include <my_service_manager.h>
#include <key.h>
-#include <sql_manager.h>
/* Include necessary InnoDB headers */
#include "btr0btr.h"
@@ -19513,68 +19512,61 @@ wsrep_abort_slave_trx(
(long long)bf_seqno, (long long)victim_seqno);
abort();
}
-
-struct bg_wsrep_kill_trx_arg {
- my_thread_id thd_id;
- trx_id_t trx_id;
- int64_t bf_seqno;
- ibool signal;
-};
-
-static void bg_wsrep_kill_trx(
- void *void_arg)
+/*******************************************************************//**
+This function is used to kill one transaction in BF. */
+UNIV_INTERN
+void
+wsrep_innobase_kill_one_trx(
+/*========================*/
+ MYSQL_THD const bf_thd,
+ const trx_t * const bf_trx,
+ trx_t *victim_trx,
+ ibool signal)
{
- bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)void_arg;
- THD *thd = find_thread_by_id(arg->thd_id, false);
- trx_t *victim_trx = NULL;
- bool awake = false;
- DBUG_ENTER("bg_wsrep_kill_trx");
+ ut_ad(bf_thd);
+ ut_ad(victim_trx);
+ ut_ad(lock_mutex_own());
+ ut_ad(trx_mutex_own(victim_trx));
- if (thd) {
- victim_trx= thd_to_trx(thd);
- /* Victim trx might not exist e.g. on MDL-conflict. */
- if (victim_trx) {
- lock_mutex_enter();
- trx_mutex_enter(victim_trx);
- if (victim_trx->id != arg->trx_id ||
- victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY)
- {
- /* Victim was meanwhile rolled back or
- committed */
- trx_mutex_exit(victim_trx);
- lock_mutex_exit();
- wsrep_thd_UNLOCK(thd);
- victim_trx= NULL;
- }
- } else {
- /* find_thread_by_id locked
- THD::LOCK_thd_data */
- wsrep_thd_UNLOCK(thd);
- }
- }
+ DBUG_ENTER("wsrep_innobase_kill_one_trx");
+ THD *thd = (THD *) victim_trx->mysql_thd;
+ int64_t bf_seqno = wsrep_thd_trx_seqno(bf_thd);
- if (!victim_trx) {
- /* Victim trx might not exist (MDL-conflict) or victim
- was meanwhile rolled back or committed because of
- a KILL statement or a disconnect. */
- goto ret;
+ if (!thd) {
+ DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
+ WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id);
+ DBUG_VOID_RETURN;
}
+ WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
+
WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF
"), victim: (%lu) trx: " TRX_ID_FMT,
- arg->signal, arg->bf_seqno,
+ signal, bf_seqno,
thd_get_thread_id(thd),
victim_trx->id);
WSREP_DEBUG("Aborting query: %s conf %d trx: %" PRId64,
- (wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void",
+ (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void",
wsrep_thd_conflict_state(thd, FALSE),
wsrep_thd_ws_handle(thd)->trx_id);
+ wsrep_thd_LOCK(thd);
+ DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
+ {
+ const char act[]=
+ "now "
+ "wait_for signal.wsrep_after_BF_victim_lock";
+ DBUG_ASSERT(!debug_sync_set_action(bf_thd,
+ STRING_WITH_LEN(act)));
+ };);
+
+
if (wsrep_thd_query_state(thd) == QUERY_EXITING) {
WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT,
victim_trx->id);
- goto ret_unlock;
+ wsrep_thd_UNLOCK(thd);
+ DBUG_VOID_RETURN;
}
if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
@@ -19590,13 +19582,18 @@ static void bg_wsrep_kill_trx(
case MUST_ABORT:
WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state",
victim_trx->id);
- goto ret_awake;
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
+ DBUG_VOID_RETURN;
+ break;
case ABORTED:
case ABORTING: // fall through
default:
WSREP_DEBUG("victim " TRX_ID_FMT " in state %d",
victim_trx->id, wsrep_thd_get_conflict_state(thd));
- goto ret_unlock;
+ wsrep_thd_UNLOCK(thd);
+ DBUG_VOID_RETURN;
+ break;
}
switch (wsrep_thd_query_state(thd)) {
@@ -19609,12 +19606,12 @@ static void bg_wsrep_kill_trx(
victim_trx->id);
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
- wsrep_abort_slave_trx(arg->bf_seqno,
+ wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
} else {
wsrep_t *wsrep= get_wsrep();
rcode = wsrep->abort_pre_commit(
- wsrep, arg->bf_seqno,
+ wsrep, bf_seqno,
(wsrep_trx_id_t)wsrep_thd_ws_handle(thd)->trx_id
);
@@ -19623,7 +19620,10 @@ static void bg_wsrep_kill_trx(
WSREP_DEBUG("cancel commit warning: "
TRX_ID_FMT,
victim_trx->id);
- goto ret_awake;
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
+ DBUG_VOID_RETURN;
+ break;
case WSREP_OK:
break;
default:
@@ -19636,9 +19636,12 @@ static void bg_wsrep_kill_trx(
* kill the lock holder first.
*/
abort();
+ break;
}
}
- goto ret_awake;
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
+ break;
case QUERY_EXEC:
/* it is possible that victim trx is itself waiting for some
* other lock. We need to cancel this waiting
@@ -19659,20 +19662,26 @@ static void bg_wsrep_kill_trx(
lock_cancel_waiting_and_release(wait_lock);
}
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
} else {
/* abort currently executing query */
DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu",
thd_get_thread_id(thd)));
WSREP_DEBUG("kill query for: %ld",
thd_get_thread_id(thd));
+ /* Note that innobase_kill_query will take lock_mutex
+ and trx_mutex */
+ wsrep_thd_UNLOCK(thd);
+ wsrep_thd_awake(thd, signal);
/* for BF thd, we need to prevent him from committing */
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
- wsrep_abort_slave_trx(arg->bf_seqno,
+ wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
}
}
- goto ret_awake;
+ break;
case QUERY_IDLE:
{
WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id);
@@ -19680,9 +19689,10 @@ static void bg_wsrep_kill_trx(
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
WSREP_DEBUG("kill BF IDLE, seqno: %lld",
(long long)wsrep_thd_trx_seqno(thd));
- wsrep_abort_slave_trx(arg->bf_seqno,
+ wsrep_thd_UNLOCK(thd);
+ wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
- goto ret_unlock;
+ DBUG_VOID_RETURN;
}
/* This will lock thd from proceeding after net_read() */
wsrep_thd_set_conflict_state(thd, ABORTING);
@@ -19703,67 +19713,17 @@ static void bg_wsrep_kill_trx(
DBUG_PRINT("wsrep",("signalling wsrep rollbacker"));
WSREP_DEBUG("signaling aborter");
wsrep_unlock_rollback();
- goto ret_unlock;
+ wsrep_thd_UNLOCK(thd);
+
+ break;
}
default:
WSREP_WARN("bad wsrep query state: %d",
wsrep_thd_query_state(thd));
- goto ret_unlock;
+ wsrep_thd_UNLOCK(thd);
+ break;
}
-ret_awake:
- awake= true;
-
-ret_unlock:
- trx_mutex_exit(victim_trx);
- lock_mutex_exit();
- if (awake)
- wsrep_thd_awake(thd, arg->signal);
- wsrep_thd_UNLOCK(thd);
-
-ret:
- free(arg);
- DBUG_VOID_RETURN;
-
-}
-
-/*******************************************************************//**
-This function is used to kill one transaction in BF. */
-UNIV_INTERN
-void
-wsrep_innobase_kill_one_trx(
-/*========================*/
- MYSQL_THD const bf_thd,
- const trx_t * const bf_trx,
- trx_t *victim_trx,
- ibool signal)
-{
- ut_ad(bf_thd);
- ut_ad(victim_trx);
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(victim_trx));
-
- bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg));
- arg->thd_id = thd_get_thread_id(victim_trx->mysql_thd);
- arg->trx_id = victim_trx->id;
- arg->bf_seqno = wsrep_thd_trx_seqno((THD*)bf_thd);
- arg->signal = signal;
-
- DBUG_ENTER("wsrep_innobase_kill_one_trx");
-
- WSREP_LOG_CONFLICT(bf_thd, victim_trx->mysql_thd, TRUE);
-
- DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
- {
- const char act[]=
- "now "
- "wait_for signal.wsrep_after_BF_victim_lock";
- DBUG_ASSERT(!debug_sync_set_action(bf_thd,
- STRING_WITH_LEN(act)));
- };);
-
-
- mysql_manager_submit(bg_wsrep_kill_trx, arg);
DBUG_VOID_RETURN;
}
@@ -19798,8 +19758,8 @@ wsrep_abort_transaction(
WSREP_DEBUG("victim does not have transaction");
wsrep_thd_LOCK(victim_thd);
wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT);
- wsrep_thd_awake(victim_thd, signal);
wsrep_thd_UNLOCK(victim_thd);
+ wsrep_thd_awake(victim_thd, signal);
}
DBUG_VOID_RETURN;