summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsjaakola <seppo.jaakola@iki.fi>2021-10-21 14:49:51 +0300
committerJan Lindström <jan.lindstrom@mariadb.com>2021-10-28 12:00:41 +0300
commit2c8f52ea53d7532e395fa92c3b0e9c5dfb619403 (patch)
tree62c4fc3353e021dc3c5d0d1b6931b6473e9a67be
parent910fc72a7f14aaa8e37e9522518d8bff49e98c35 (diff)
downloadmariadb-git-bb-10.5-KILL-as-TOI-galera.tar.gz
MDEV-23328 Server hang due to Galera lock conflict resolutionbb-10.5-KILL-as-TOI-galera
Mutex order violation when wsrep bf thread kills a conflicting trx, the stack is wsrep_thd_LOCK() wsrep_kill_victim() lock_rec_other_has_conflicting() lock_clust_rec_read_check_and_lock() row_search_mvcc() ha_innobase::index_read() ha_innobase::rnd_pos() handler::ha_rnd_pos() handler::rnd_pos_by_record() handler::ha_rnd_pos_by_record() Rows_log_event::find_row() Update_rows_log_event::do_exec_row() Rows_log_event::do_apply_event() Log_event::apply_event() wsrep_apply_events() and mutexes are taken in the order lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data When a normal KILL statement is executed, the stack is innobase_kill_query() kill_handlerton() plugin_foreach_with_mask() ha_kill_query() THD::awake() kill_one_thread() and mutexes are victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex This patch is the plan D variant for fixing potetial mutex locking order exercised by BF aborting and KILL command execution. In this approach, KILL command is replicated as TOI operation. This guarantees total isolation for the KILL command execution in the first node: there is no concurrent replication applying and no concurrent DDL executing. Therefore there is no risk of BF aborting to happen in parallel with KILL command execution either. Potential mutex deadlocks between the different mutex access paths with KILL command execution and BF aborting cannot therefore happen. TOI replication is used, in this approach, purely as means to provide isolated KILL command execution in the first node. KILL command should not (and must not) be applied in secondary nodes. In this patch, we make this sure by skipping KILL execution in secondary nodes, in applying phase, where we bail out if applier thread is trying to execute KILL command. This is effective, but skipping the applying of KILL command could happen much earlier as well. This also fixed unprotected calls to wsrep_thd_abort that will use wsrep_abort_transaction. This is fixed by holding THD::LOCK_thd_data while we abort transaction. Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
-rw-r--r--mysql-test/suite/galera/r/galera_UK_conflict.result13
-rw-r--r--mysql-test/suite/galera/r/galera_bf_kill_debug.result54
-rw-r--r--mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result16
-rw-r--r--mysql-test/suite/galera/t/galera_UK_conflict.test23
-rw-r--r--mysql-test/suite/galera/t/galera_bf_kill_debug.cnf7
-rw-r--r--mysql-test/suite/galera/t/galera_bf_kill_debug.test140
-rw-r--r--mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test14
-rw-r--r--mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test2
-rw-r--r--sql/handler.cc2
-rw-r--r--sql/service_wsrep.cc34
-rw-r--r--sql/slave.cc2
-rw-r--r--sql/sql_class.cc30
-rw-r--r--sql/sql_class.h4
-rw-r--r--sql/sql_parse.cc49
-rw-r--r--sql/sql_repl.cc2
-rw-r--r--sql/wsrep_client_service.cc9
-rw-r--r--sql/wsrep_mysqld.cc43
-rw-r--r--sql/wsrep_thd.cc30
-rw-r--r--storage/innobase/handler/ha_innodb.cc223
-rw-r--r--storage/innobase/include/ha_prototypes.h6
-rw-r--r--storage/innobase/lock/lock0wait.cc23
21 files changed, 317 insertions, 409 deletions
diff --git a/mysql-test/suite/galera/r/galera_UK_conflict.result b/mysql-test/suite/galera/r/galera_UK_conflict.result
index 44bb64c9d63..cc7e17d7c58 100644
--- a/mysql-test/suite/galera/r/galera_UK_conflict.result
+++ b/mysql-test/suite/galera/r/galera_UK_conflict.result
@@ -68,9 +68,9 @@ f1 f2 f3
10 10 0
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
-DROP TABLE t1;
-test scenario 2
-connection node_1;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+7
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2));
INSERT INTO t1 VALUES (1, 1, 0);
INSERT INTO t1 VALUES (3, 3, 0);
@@ -92,9 +92,9 @@ SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync';
connection node_1;
-COMMIT;
-connection node_1a;
-SET SESSION wsrep_on = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+7
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_replay_cb";
@@ -125,6 +125,7 @@ f1 f2 f3
3 3 1
4 4 2
5 5 2
+8 8 8
10 10 0
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
diff --git a/mysql-test/suite/galera/r/galera_bf_kill_debug.result b/mysql-test/suite/galera/r/galera_bf_kill_debug.result
deleted file mode 100644
index c3eae243f47..00000000000
--- a/mysql-test/suite/galera/r/galera_bf_kill_debug.result
+++ /dev/null
@@ -1,54 +0,0 @@
-connection node_2;
-connection node_1;
-connection node_2;
-CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
-insert into t1 values (NULL,1);
-connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
-connection node_2a;
-truncate t1;
-insert into t1 values (1,0);
-begin;
-update t1 set b=2 where a=1;
-connection node_2;
-set session wsrep_sync_wait=0;
-connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2;
-connection node_2b;
-SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
-connection node_1;
-select * from t1;
-a b
-1 0
-update t1 set b= 1 where a=1;
-connection node_2b;
-SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
-connection node_2;
-SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
-connection node_2b;
-SET DEBUG_SYNC='now WAIT_FOR awake_reached';
-SET GLOBAL debug_dbug = "";
-SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
-SET DEBUG_SYNC = "now SIGNAL continue_kill";
-connection node_2;
-connection node_2a;
-select * from t1;
-connection node_2;
-SET DEBUG_SYNC = "RESET";
-drop table t1;
-disconnect node_2a;
-connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
-connection node_2a;
-CREATE TABLE t1 (i int primary key);
-SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
-INSERT INTO t1 VALUES (1);
-connection node_2;
-SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
-SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
-SET DEBUG_SYNC='RESET';
-connection node_2a;
-connection node_2;
-select * from t1;
-i
-1
-disconnect node_2a;
-connection node_1;
-drop table t1;
diff --git a/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result b/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result
index 6e55c59ad15..2493075b635 100644
--- a/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result
+++ b/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result
@@ -23,22 +23,6 @@ connection node_1a;
connection node_1b;
connection node_2;
connection node_2a;
-connection node_1;
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-COUNT(*)
-20001
-SELECT COUNT(*) FROM child;
-COUNT(*)
-10000
-connection node_2;
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-COUNT(*)
-20001
-SELECT COUNT(*) FROM child;
-COUNT(*)
-10000
DROP TABLE child;
DROP TABLE parent;
DROP TABLE ten;
diff --git a/mysql-test/suite/galera/t/galera_UK_conflict.test b/mysql-test/suite/galera/t/galera_UK_conflict.test
index 9978ba9b8bf..25f414a5764 100644
--- a/mysql-test/suite/galera/t/galera_UK_conflict.test
+++ b/mysql-test/suite/galera/t/galera_UK_conflict.test
@@ -140,6 +140,14 @@ SELECT * FROM t1;
# original state in node 1
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
+SELECT COUNT(*) FROM t1;
+SELECT * FROM t1;
+
+--connection node_1
+--let $wait_condition = SELECT COUNT(*) = 7 FROM t1
+--source include/wait_condition.inc
+SELECT COUNT(*) FROM t1;
+SELECT * FROM t1;
DROP TABLE t1;
@@ -199,9 +207,9 @@ INSERT INTO t1 VALUES (5, 5, 2);
--source include/galera_set_sync_point.inc
--connection node_1
---send COMMIT
-
---connection node_1a
+--let $wait_condition = SELECT COUNT(*) = 7 FROM t1
+--source include/wait_condition.inc
+SELECT COUNT(*) FROM t1;
# wait for the local commit to enter in commit monitor wait state
--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync
--source include/galera_wait_sync_point.inc
@@ -273,4 +281,13 @@ SELECT * FROM t1;
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
+SELECT COUNT(*) FROM t1;
+SELECT * FROM t1;
+
+--connection node_1
+--let $wait_condition = SELECT COUNT(*) = 7 FROM t1
+--source include/wait_condition.inc
+SELECT COUNT(*) FROM t1;
+SELECT * FROM t1;
+
DROP TABLE t1;
diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf b/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf
deleted file mode 100644
index e68f891792c..00000000000
--- a/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf
+++ /dev/null
@@ -1,7 +0,0 @@
-!include ../galera_2nodes.cnf
-
-[mysqld.1]
-wsrep-debug=SERVER
-
-[mysqld.2]
-wsrep-debug=SERVER
diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.test b/mysql-test/suite/galera/t/galera_bf_kill_debug.test
deleted file mode 100644
index b687a5a6a67..00000000000
--- a/mysql-test/suite/galera/t/galera_bf_kill_debug.test
+++ /dev/null
@@ -1,140 +0,0 @@
---source include/galera_cluster.inc
---source include/have_innodb.inc
---source include/have_debug.inc
---source include/have_debug_sync.inc
-
-#
-# Test case 7:
-# 1. Start a transaction on node_2,
-# and leave it pending while holding a row locked
-# 2. set sync point pause applier
-# 3. send a conflicting write on node_1, it will pause
-# at the sync point
-# 4. though another connection to node_2, kill the local
-# transaction
-#
-
---connection node_2
-CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
-insert into t1 values (NULL,1);
-
-#
-# connection node_2a runs a local transaction, that is victim of BF abort
-# and victim of KILL command by connection node_2
-#
---connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
---connection node_2a
-truncate t1;
-insert into t1 values (1,0);
-
-# start a transaction that will conflict with later applier
-begin;
-update t1 set b=2 where a=1;
-
---connection node_2
-set session wsrep_sync_wait=0;
---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1
---source include/wait_condition.inc
-
---let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`
-
-# connection node_2b is for controlling debug syn points
-# first set a sync point for applier, to pause during BF aborting
-# and before THD::awake would be called
-#
---connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2
---connection node_2b
-SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
-
-#
-# replicate an update, which will BF abort the victim node_2a
-# however, while applier in node 2 is handling the abort,
-# it will pause in sync point set by node_2b
-#
---connection node_1
-select * from t1;
-update t1 set b= 1 where a=1;
-
-#
-# wait until the applying of above update has reached the sync point
-# in node 2
-#
---connection node_2b
-SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
-
---connection node_2
-#
-# pause KILL execution before awake
-#
-SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
---disable_query_log
---send_eval KILL $k_thread
---enable_query_log
-
-
---connection node_2b
-SET DEBUG_SYNC='now WAIT_FOR awake_reached';
-
-# release applier and KILL operator
-SET GLOBAL debug_dbug = "";
-SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
-SET DEBUG_SYNC = "now SIGNAL continue_kill";
-
---connection node_2
---reap
-
---connection node_2a
---error 0,1213
-select * from t1;
-
---connection node_2
-SET DEBUG_SYNC = "RESET";
-
-drop table t1;
-
---disconnect node_2a
-#
-# Test case 7:
-# run a transaction in node 2, and set a sync point to pause the transaction
-# in commit phase.
-# Through another connection to node 2, kill the committing transaction by
-# KILL QUERY command
-#
-
---connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
---connection node_2a
---let $connection_id = `SELECT CONNECTION_ID()`
-
-CREATE TABLE t1 (i int primary key);
-
-# Set up sync point
-SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
-
-# Send insert which will block in the sync point above
---send INSERT INTO t1 VALUES (1)
-
---connection node_2
-SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
-
---disable_query_log
---disable_result_log
-# victim has passed the point of no return, kill is not possible anymore
---eval KILL QUERY $connection_id
---enable_result_log
---enable_query_log
-
-SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
-SET DEBUG_SYNC='RESET';
---connection node_2a
---error 0,1213
---reap
-
---connection node_2
-# victim was able to complete the INSERT
-select * from t1;
-
---disconnect node_2a
-
---connection node_1
-drop table t1;
-
diff --git a/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test b/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test
index fadc94d78ff..3b4b427f551 100644
--- a/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test
+++ b/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test
@@ -54,15 +54,11 @@ INSERT INTO parent VALUES (1, 0);
--connection node_2a
--reap
---connection node_1
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-SELECT COUNT(*) FROM child;
-
---connection node_2
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-SELECT COUNT(*) FROM child;
+#
+# ALTER TABLE could bf kill one or more of INSERTs to parent, so
+# the actual number of rows in PARENT depends on whether
+# the INSERT is committed before ALTER TABLE is executed
+#
DROP TABLE child;
DROP TABLE parent;
diff --git a/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test b/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test
index c0bbe5af8cf..241b62dbf8c 100644
--- a/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test
+++ b/mysql-test/suite/galera/t/galera_var_auto_inc_control_off.test
@@ -94,11 +94,13 @@ SELECT * FROM t1;
--eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig
--eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node1
--eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node1
+--disconnect node_1a
--connection node_2
--eval SET GLOBAL wsrep_auto_increment_control = $auto_increment_control_orig
--eval SET GLOBAL auto_increment_increment = $auto_increment_increment_node2
--eval SET GLOBAL auto_increment_offset = $auto_increment_offset_node2
+--disconnect node_2a
--enable_query_log
diff --git a/sql/handler.cc b/sql/handler.cc
index 2fad0dca954..0c8debc824f 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -913,7 +913,7 @@ static my_bool kill_handlerton(THD *thd, plugin_ref plugin,
{
handlerton *hton= plugin_hton(plugin);
- mysql_mutex_assert_owner(&thd->LOCK_thd_data);
+ mysql_mutex_assert_owner(&thd->LOCK_thd_kill);
if (hton->kill_query && thd_get_ha_data(thd, hton))
hton->kill_query(hton, thd, *(enum thd_kill_levels *) level);
return FALSE;
diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc
index 19ec3d948c4..c4951f6d200 100644
--- a/sql/service_wsrep.cc
+++ b/sql/service_wsrep.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 Codership Oy <info@codership.com>
+/* Copyright 2018-2021 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -29,12 +29,14 @@ extern "C" my_bool wsrep_on(const THD *thd)
extern "C" void wsrep_thd_LOCK(const THD *thd)
{
+ mysql_mutex_lock(&thd->LOCK_thd_kill);
mysql_mutex_lock(&thd->LOCK_thd_data);
}
extern "C" void wsrep_thd_UNLOCK(const THD *thd)
{
mysql_mutex_unlock(&thd->LOCK_thd_data);
+ mysql_mutex_unlock(&thd->LOCK_thd_kill);
}
extern "C" void wsrep_thd_kill_LOCK(const THD *thd)
@@ -188,6 +190,8 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
DBUG_ASSERT(wsrep_thd_is_SR(victim_thd));
if (!victim_thd || !wsrep_on(bf_thd)) return;
+ wsrep_thd_LOCK(victim_thd);
+
WSREP_DEBUG("handle rollback, for deadlock: thd %llu trx_id %" PRIu64 " frags %zu conf %s",
victim_thd->thread_id,
victim_thd->wsrep_trx_id(),
@@ -208,6 +212,9 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
{
wsrep_thd_self_abort(victim_thd);
}
+
+ wsrep_thd_UNLOCK(victim_thd);
+
if (bf_thd)
{
wsrep_store_threadvars(bf_thd);
@@ -217,6 +224,9 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
my_bool signal)
{
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
+
DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
{
const char act[]=
@@ -233,28 +243,26 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
have wsrep on. Note that this should never interrupt RSU
as RSU has paused the provider.
*/
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
+
if ((ret || !wsrep_on(victim_thd)) && signal)
{
- mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data);
- mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_kill);
- mysql_mutex_lock(&victim_thd->LOCK_thd_data);
-
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
{
WSREP_DEBUG("victim is killed already by %llu, skipping awake",
victim_thd->wsrep_aborter);
- mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
+ wsrep_thd_UNLOCK(victim_thd);
return false;
}
- mysql_mutex_lock(&victim_thd->LOCK_thd_kill);
victim_thd->wsrep_aborter= bf_thd->thread_id;
victim_thd->awake_no_mutex(KILL_QUERY);
- mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
- mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
- } else {
- WSREP_DEBUG("wsrep_thd_bf_abort skipped awake");
}
+ else
+ WSREP_DEBUG("wsrep_thd_bf_abort skipped awake for %llu", thd_get_thread_id(victim_thd));
+
+ wsrep_thd_UNLOCK(victim_thd);
return ret;
}
@@ -279,8 +287,6 @@ extern "C" my_bool wsrep_thd_order_before(const THD *left, const THD *right)
extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd)
{
- mysql_mutex_assert_owner(&thd->LOCK_thd_data);
-
const wsrep::client_state& cs(thd->wsrep_cs());
const enum wsrep::transaction::state tx_state(cs.transaction().state());
switch (tx_state)
@@ -294,8 +300,6 @@ extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd)
default:
return false;
}
-
- return false;
}
static inline enum wsrep::key::type
diff --git a/sql/slave.cc b/sql/slave.cc
index c63eee605f5..62ceb07b224 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1072,8 +1072,8 @@ terminate_slave_thread(THD *thd,
int error __attribute__((unused));
DBUG_PRINT("loop", ("killing slave thread"));
- mysql_mutex_lock(&thd->LOCK_thd_data);
mysql_mutex_lock(&thd->LOCK_thd_kill);
+ mysql_mutex_lock(&thd->LOCK_thd_data);
#ifndef DONT_USE_THR_ALARM
/*
Error codes from pthread_kill are:
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index b8b09b2ef94..62bf01e32a1 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -826,6 +826,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier)
mysql_mutex_init(key_LOCK_wakeup_ready, &LOCK_wakeup_ready, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_LOCK_thd_kill, &LOCK_thd_kill, MY_MUTEX_INIT_FAST);
mysql_cond_init(key_COND_wakeup_ready, &COND_wakeup_ready, 0);
+ mysql_mutex_record_order(&LOCK_thd_kill, &LOCK_thd_data);
/* Variables with default values */
proc_info="login";
@@ -1883,7 +1884,6 @@ void THD::awake_no_mutex(killed_state state_to_set)
DBUG_PRINT("enter", ("this: %p current_thd: %p state: %d",
this, current_thd, (int) state_to_set));
THD_CHECK_SENTRY(this);
- mysql_mutex_assert_owner(&LOCK_thd_data);
mysql_mutex_assert_owner(&LOCK_thd_kill);
print_aborted_warning(3, "KILLED");
@@ -2048,6 +2048,8 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use,
if (needs_thr_lock_abort)
{
+ bool mutex_released= false;
+ mysql_mutex_lock(&in_use->LOCK_thd_kill);
mysql_mutex_lock(&in_use->LOCK_thd_data);
/* If not already dying */
if (in_use->killed != KILL_CONNECTION_HARD)
@@ -2064,12 +2066,25 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use,
thread can see those instances (e.g. see partitioning code).
*/
if (!thd_table->needs_reopen())
- {
signalled|= mysql_lock_abort_for_thread(this, thd_table);
- }
}
+#ifdef WITH_WSREP
+ if (WSREP(this) && wsrep_thd_is_BF(this, false))
+ {
+ WSREP_DEBUG("notify_shared_lock: BF thread %llu query %s"
+ " victim %llu query %s",
+ this->real_id, wsrep_thd_query(this),
+ in_use->real_id, wsrep_thd_query(in_use));
+ wsrep_abort_thd(this, in_use, false);
+ mutex_released= true;
+ }
+#endif /* WITH_WSREP */
+ }
+ if (!mutex_released)
+ {
+ mysql_mutex_unlock(&in_use->LOCK_thd_data);
+ mysql_mutex_unlock(&in_use->LOCK_thd_kill);
}
- mysql_mutex_unlock(&in_use->LOCK_thd_data);
}
DBUG_RETURN(signalled);
}
@@ -5288,11 +5303,14 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)
#ifdef WITH_WSREP
/* wsrep applier, replayer and TOI processing threads are ordered
by replication provider, relaxed GAP locking protocol can be used
- between high priority wsrep threads
+ between high priority wsrep threads.
+ Note that wsrep_thd_is_BF() doesn't take LOCK_thd_data for either thd,
+ the caller should guarantee that the BF state won't change.
+ (e.g. InnoDB does it by keeping lock_sys.mutex locked)
*/
if (WSREP_ON &&
wsrep_thd_is_BF(const_cast<THD *>(thd), false) &&
- wsrep_thd_is_BF(const_cast<THD *>(other_thd), true))
+ wsrep_thd_is_BF(const_cast<THD *>(other_thd), false))
return 0;
#endif /* WITH_WSREP */
rgi= thd->rgi_slave;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 81452cda035..7aac98eccb2 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -3470,11 +3470,11 @@ public:
void awake_no_mutex(killed_state state_to_set);
void awake(killed_state state_to_set)
{
- mysql_mutex_lock(&LOCK_thd_data);
mysql_mutex_lock(&LOCK_thd_kill);
+ mysql_mutex_lock(&LOCK_thd_data);
awake_no_mutex(state_to_set);
- mysql_mutex_unlock(&LOCK_thd_kill);
mysql_mutex_unlock(&LOCK_thd_data);
+ mysql_mutex_unlock(&LOCK_thd_kill);
}
void abort_current_cond_wait(bool force);
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index af30aa6fde9..84ab136bb07 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -9248,7 +9248,7 @@ THD *find_thread_by_id(longlong id, bool query_id)
return arg.thd;
}
- mysql_mutex_lock(&thd->LOCK_thd_data);
+
/**
kill one thread.
@@ -9292,7 +9292,8 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ
faster and do a harder kill than KILL_SYSTEM_THREAD;
*/
- mysql_mutex_lock(&tmp->LOCK_thd_data); // for various wsrep* checks below
+ mysql_mutex_lock(&tmp->LOCK_thd_data); // Lock from concurrent usage
+
#ifdef WITH_WSREP
if (((thd->security_ctx->master_access & PRIV_KILL_OTHER_USER_PROCESS) ||
thd->security_ctx->user_matches(tmp->security_ctx)) &&
@@ -9307,23 +9308,23 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ
if (tmp->wsrep_aborter && tmp->wsrep_aborter != thd->thread_id)
{
/* victim is in hit list already, bail out */
- WSREP_DEBUG("victim has wsrep aborter: %lu, skipping awake()",
- tmp->wsrep_aborter);
+ WSREP_DEBUG("victim %llu has wsrep aborter: %lu, skipping awake()",
+ id, tmp->wsrep_aborter);
error= 0;
}
else
#endif /* WITH_WSREP */
{
- WSREP_DEBUG("kill_one_thread %llu, victim: %llu wsrep_aborter %llu by signal %d",
- thd->thread_id, id, tmp->wsrep_aborter, kill_signal);
+ WSREP_DEBUG("kill_one_thread victim: %llu wsrep_aborter %lu by signal %d",
+ id, tmp->wsrep_aborter, kill_signal);
tmp->awake_no_mutex(kill_signal);
- WSREP_DEBUG("victim: %llu taken care of", id);
error= 0;
}
}
else
error= (type == KILL_TYPE_QUERY ? ER_KILL_QUERY_DENIED_ERROR :
ER_KILL_DENIED_ERROR);
+
mysql_mutex_unlock(&tmp->LOCK_thd_data);
}
mysql_mutex_unlock(&tmp->LOCK_thd_kill);
@@ -9438,6 +9439,18 @@ static
void sql_kill(THD *thd, longlong id, killed_state state, killed_type type)
{
uint error;
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ WSREP_DEBUG("sql_kill called");
+ if (thd->wsrep_applier)
+ {
+ WSREP_DEBUG("KILL in applying, bailing out here");
+ return;
+ }
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
+ }
+#endif /* WITH_WSREP */
if (likely(!(error= kill_one_thread(thd, id, state, type))))
{
if (!thd->killed)
@@ -9447,6 +9460,11 @@ void sql_kill(THD *thd, longlong id, killed_state state, killed_type type)
}
else
my_error(error, MYF(0), id);
+#ifdef WITH_WSREP
+ return;
+ wsrep_error_label:
+ my_error(ER_CANNOT_USER, MYF(0), wsrep_thd_query(thd));
+#endif /* WITH_WSREP */
}
@@ -9455,6 +9473,18 @@ sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
{
uint error;
ha_rows rows;
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ WSREP_DEBUG("sql_kill_user called");
+ if (thd->wsrep_applier)
+ {
+ WSREP_DEBUG("KILL in applying, bailing out here");
+ return;
+ }
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
+ }
+#endif /* WITH_WSREP */
if (likely(!(error= kill_threads_for_user(thd, user, state, &rows))))
my_ok(thd, rows);
else
@@ -9465,6 +9495,11 @@ sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
*/
my_error(error, MYF(0), user->host.str, user->user.str);
}
+#ifdef WITH_WSREP
+ return;
+ wsrep_error_label:
+ my_error(ER_CANNOT_USER, MYF(0), user->user.str);
+#endif /* WITH_WSREP */
}
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 83806e6dbea..e15980e9b3c 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -3498,8 +3498,8 @@ static my_bool kill_callback(THD *thd, kill_callback_arg *arg)
thd->variables.server_id == arg->slave_server_id)
{
arg->thd= thd;
- mysql_mutex_lock(&thd->LOCK_thd_data);
mysql_mutex_lock(&thd->LOCK_thd_kill); // Lock from delete
+ mysql_mutex_lock(&thd->LOCK_thd_data);
return 1;
}
return 0;
diff --git a/sql/wsrep_client_service.cc b/sql/wsrep_client_service.cc
index c2f49dbdd6b..dc4b1d22818 100644
--- a/sql/wsrep_client_service.cc
+++ b/sql/wsrep_client_service.cc
@@ -68,20 +68,15 @@ bool Wsrep_client_service::interrupted(
wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED) const
{
DBUG_ASSERT(m_thd == current_thd);
- /* Underlying mutex in lock object points to LOCK_thd_data, which
- protects m_thd->wsrep_trx(), LOCK_thd_kill protects m_thd->killed.
- Locking order is:
- 1) LOCK_thd_data
- 2) LOCK_thd_kill */
+ /* Underlying mutex in lock object points to THD::LOCK_thd_data, which
+ protects m_thd->wsrep_trx() and protects us from thd delete. */
mysql_mutex_assert_owner(static_cast<mysql_mutex_t*>(lock.mutex()->native()));
- mysql_mutex_lock(&m_thd->LOCK_thd_kill);
bool ret= (m_thd->killed != NOT_KILLED);
if (ret)
{
WSREP_DEBUG("wsrep state is interrupted, THD::killed %d trx state %d",
m_thd->killed, m_thd->wsrep_trx().state());
}
- mysql_mutex_unlock(&m_thd->LOCK_thd_kill);
return ret;
}
diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc
index 560db53d16e..9502309de36 100644
--- a/sql/wsrep_mysqld.cc
+++ b/sql/wsrep_mysqld.cc
@@ -2134,6 +2134,11 @@ static int wsrep_TOI_event_buf(THD* thd, uchar** buf, size_t* buf_len)
case SQLCOM_DROP_TABLE:
err= wsrep_drop_table_query(thd, buf, buf_len);
break;
+ case SQLCOM_KILL:
+ WSREP_DEBUG("KILL as TOI: %s", thd->query());
+ err= wsrep_to_buf_helper(thd, thd->query(), thd->query_length(),
+ buf, buf_len);
+ break;
case SQLCOM_CREATE_ROLE:
if (sp_process_definer(thd))
{
@@ -2547,7 +2552,11 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
request_thd, granted_thd);
ticket->wsrep_report(wsrep_debug);
- mysql_mutex_lock(&granted_thd->LOCK_thd_data);
+ /* Here we will call wsrep_abort_transaction so we should hold
+ THD::LOCK_thd_data to protect victim from concurrent usage
+ and THD::LOCK_thd_kill to protect from disconnect or delete. */
+ wsrep_thd_LOCK(granted_thd);
+
if (wsrep_thd_is_toi(granted_thd) ||
wsrep_thd_is_applying(granted_thd))
{
@@ -2555,21 +2564,22 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
{
WSREP_DEBUG("BF thread waiting for SR in aborting state");
ticket->wsrep_report(wsrep_debug);
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ wsrep_thd_UNLOCK(granted_thd);
}
else if (wsrep_thd_is_SR(granted_thd) && !wsrep_thd_is_SR(request_thd))
{
- WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR",
+ WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR",
schema, schema_len, request_thd, granted_thd);
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
wsrep_abort_thd(request_thd, granted_thd, 1);
+ mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data);
+ mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill);
}
else
{
WSREP_MDL_LOG(INFO, "MDL BF-BF conflict", schema, schema_len,
request_thd, granted_thd);
ticket->wsrep_report(true);
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ wsrep_thd_UNLOCK(granted_thd);
unireg_abort(1);
}
}
@@ -2578,15 +2588,16 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
{
WSREP_DEBUG("BF thread waiting for FLUSH");
ticket->wsrep_report(wsrep_debug);
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ wsrep_thd_UNLOCK(granted_thd);
}
else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE)
{
WSREP_DEBUG("DROP caused BF abort, conf %s",
wsrep_thd_transaction_state_str(granted_thd));
ticket->wsrep_report(wsrep_debug);
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
wsrep_abort_thd(request_thd, granted_thd, 1);
+ mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data);
+ mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill);
}
else
{
@@ -2595,8 +2606,9 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
ticket->wsrep_report(wsrep_debug);
if (granted_thd->wsrep_trx().active())
{
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
- wsrep_abort_thd(request_thd, granted_thd, 1);
+ wsrep_abort_thd(request_thd, granted_thd, true);
+ mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data);
+ mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill);
}
else
{
@@ -2604,10 +2616,11 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
Granted_thd is likely executing with wsrep_on=0. If the requesting
thd is BF, BF abort and wait.
*/
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
if (wsrep_thd_is_BF(request_thd, FALSE))
{
ha_abort_transaction(request_thd, granted_thd, TRUE);
+ mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_data);
+ mysql_mutex_assert_not_owner(&granted_thd->LOCK_thd_kill);
}
else
{
@@ -2629,6 +2642,7 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
static bool abort_replicated(THD *thd)
{
bool ret_code= false;
+ wsrep_thd_LOCK(thd);
if (thd->wsrep_trx().state() == wsrep::transaction::s_committing)
{
WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id));
@@ -2636,6 +2650,9 @@ static bool abort_replicated(THD *thd)
(void)wsrep_abort_thd(thd, thd, TRUE);
ret_code= true;
}
+ else
+ wsrep_thd_UNLOCK(thd);
+
return ret_code;
}
@@ -2673,8 +2690,10 @@ static my_bool have_client_connections(THD *thd, void*)
(longlong) thd->thread_id));
if (is_client_connection(thd) && thd->killed == KILL_CONNECTION)
{
+ WSREP_DEBUG("Informing thread %lld that it's time to die",
+ thd->thread_id);
(void)abort_replicated(thd);
- return 1;
+ return true;
}
return 0;
}
@@ -2711,6 +2730,8 @@ static my_bool kill_all_threads(THD *thd, THD *caller_thd)
{
DBUG_PRINT("quit", ("Informing thread %lld that it's time to die",
(longlong) thd->thread_id));
+ WSREP_DEBUG("Informing thread %lld that it's time to die",
+ thd->thread_id);
/* We skip slave threads & scheduler on this first loop through. */
if (is_client_connection(thd) && thd != caller_thd)
{
diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc
index dc6bb21dbd9..08827cb612e 100644
--- a/sql/wsrep_thd.cc
+++ b/sql/wsrep_thd.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013 Codership Oy <info@codership.com>
+/* Copyright (C) 2013-2021 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -314,7 +314,8 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal)
THD *victim_thd= (THD *) victim_thd_ptr;
THD *bf_thd= (THD *) bf_thd_ptr;
- mysql_mutex_lock(&victim_thd->LOCK_thd_data);
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
/* Note that when you use RSU node is desynced from cluster, thus WSREP(thd)
might not be true.
@@ -327,16 +328,14 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal)
{
WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ?
(long long)bf_thd->real_id : 0, (long long)victim_thd->real_id);
- mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
ha_abort_transaction(bf_thd, victim_thd, signal);
- mysql_mutex_lock(&victim_thd->LOCK_thd_data);
}
else
{
WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd);
+ wsrep_thd_UNLOCK(victim_thd);
}
- mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
DBUG_RETURN(1);
}
@@ -345,6 +344,9 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
WSREP_LOG_THD(bf_thd, "BF aborter before");
WSREP_LOG_THD(victim_thd, "victim before");
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
+
DBUG_EXECUTE_IF("sync.wsrep_bf_abort",
{
const char act[]=
@@ -358,7 +360,7 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active())
{
WSREP_DEBUG("wsrep_bf_abort, BF abort for non active transaction");
- switch (victim_thd->wsrep_trx().state())
+ switch (victim_thd->wsrep_trx().state())
{
case wsrep::transaction::s_aborting: /* fall through */
case wsrep::transaction::s_aborted:
@@ -367,7 +369,13 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
default:
break;
}
+ /* Test: galera_create_table_as_select. Here we enter wsrep-lib
+ were LOCK_thd_data will be acquired, thus we need to release it.
+ However, we can still hold LOCK_thd_kill to protect from
+ disconnect or delete. */
+ mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
wsrep_start_transaction(victim_thd, victim_thd->wsrep_next_trx_id());
+ mysql_mutex_lock(&victim_thd->LOCK_thd_data);
}
bool ret;
@@ -375,11 +383,21 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
if (wsrep_thd_is_toi(bf_thd))
{
+ /* Here we enter wsrep-lib were LOCK_thd_data will be acquired,
+ thus we need to release it. However, we can still hold
+ LOCK_thd_kill to protect from disconnect or delete. */
+ mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
ret= victim_thd->wsrep_cs().total_order_bf_abort(bf_seqno);
+ mysql_mutex_lock(&victim_thd->LOCK_thd_data);
}
else
{
+ /* Test: mysql-wsrep-features#165. Here we enter wsrep-lib
+ were LOCK_thd_data will be acquired and later LOCK_thd_kill
+ thus we need to release them. */
+ wsrep_thd_UNLOCK(victim_thd);
ret= victim_thd->wsrep_cs().bf_abort(bf_seqno);
+ wsrep_thd_LOCK(victim_thd);
}
if (ret)
{
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 9026b5844dd..8f5c9ab56ed 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -4761,7 +4761,7 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels)
{
ut_ad(trx->mysql_thd == thd);
#ifdef WITH_WSREP
- if (trx->is_wsrep() && wsrep_thd_is_aborting(thd))
+ if (wsrep_thd_is_aborting(thd) || trx->lock.was_chosen_as_wsrep_victim)
/* if victim has been signaled by BF thread and/or aborting is already
progressing, following query aborting is not necessary any more.
Also, BF thread should own trx mutex for the victim. */
@@ -4771,6 +4771,8 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels)
if (lock_t *lock= trx->lock.wait_lock)
{
trx_mutex_enter(trx);
+ if (trx->is_wsrep() && wsrep_thd_is_aborting(thd))
+ trx->lock.was_chosen_as_deadlock_victim= TRUE;
lock_cancel_waiting_and_release(lock);
trx_mutex_exit(trx);
}
@@ -18639,6 +18641,40 @@ static struct st_mysql_storage_engine innobase_storage_engine=
#ifdef WITH_WSREP
+static
+void
+wsrep_kill_victim(
+ MYSQL_THD const bf_thd,
+ MYSQL_THD thd,
+ trx_t* victim_trx,
+ my_bool signal)
+{
+ DBUG_ENTER("wsrep_kill_victim");
+
+ /* Mark transaction as a victim for Galera abort */
+ victim_trx->lock.was_chosen_as_wsrep_victim= true;
+ if (wsrep_thd_set_wsrep_aborter(bf_thd, thd))
+ {
+ WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
+ wsrep_thd_UNLOCK(thd);
+ DBUG_VOID_RETURN;
+ }
+
+ if (wsrep_thd_bf_abort(bf_thd, thd, signal))
+ {
+ lock_t* wait_lock= victim_trx->lock.wait_lock;
+ if (wait_lock)
+ {
+ DBUG_ASSERT(victim_trx->is_wsrep());
+ WSREP_DEBUG("victim has wait flag: %lu", thd_get_thread_id(thd));
+ victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
+ lock_cancel_waiting_and_release(wait_lock);
+ }
+ }
+
+ DBUG_VOID_RETURN;
+}
+
/** This function is used to kill one transaction.
This transaction was open on this node (not-yet-committed), and a
@@ -18662,87 +18698,65 @@ comparison as in the local certification failure.
@param[in] bf_thd Brute force (BF) thread
@param[in,out] victim_trx Vimtim trx to be killed
@param[in] signal Should victim be signaled */
-UNIV_INTERN
void
wsrep_innobase_kill_one_trx(
- THD* bf_thd,
+ MYSQL_THD const bf_thd,
trx_t *victim_trx,
- bool signal)
+ my_bool signal)
{
- ut_ad(bf_thd);
- ut_ad(victim_trx);
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(victim_trx));
-
- DBUG_ENTER("wsrep_innobase_kill_one_trx");
-
- THD *thd= (THD *) victim_trx->mysql_thd;
- ut_ad(thd);
- /* Note that bf_trx might not exist here e.g. on MDL conflict
- case (test: galera_concurrent_ctas). Similarly, BF thread
- could be also acquiring MDL-lock causing victim to be
- aborted. However, we have not yet called innobase_trx_init()
- for BF transaction (test: galera_many_columns)*/
- trx_t* bf_trx= thd_to_trx(bf_thd);
- DBUG_ASSERT(wsrep_on(bf_thd));
-
- wsrep_thd_LOCK(thd);
-
- WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
-
- WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld "
- "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
- "query: %s",
- wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
- bf_trx ? bf_trx->id : TRX_ID_MAX,
- thd_get_thread_id(bf_thd),
- wsrep_thd_trx_seqno(bf_thd),
- wsrep_thd_client_state_str(bf_thd),
- wsrep_thd_client_mode_str(bf_thd),
- wsrep_thd_transaction_state_str(bf_thd),
- wsrep_thd_query(bf_thd));
-
- WSREP_DEBUG("Victim %s trx_id: " TRX_ID_FMT " thread: %ld "
- "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
- "query: %s",
- wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
- victim_trx->id,
- thd_get_thread_id(thd),
- wsrep_thd_trx_seqno(thd),
- wsrep_thd_client_state_str(thd),
- wsrep_thd_client_mode_str(thd),
- wsrep_thd_transaction_state_str(thd),
- wsrep_thd_query(thd));
-
- /* Mark transaction as a victim for Galera abort */
- victim_trx->lock.was_chosen_as_wsrep_victim= true;
- if (wsrep_thd_set_wsrep_aborter(bf_thd, thd))
- {
- WSREP_DEBUG("innodb kill transaction skipped due to wsrep_aborter set");
- wsrep_thd_UNLOCK(thd);
- DBUG_VOID_RETURN;
- }
+ ut_ad(bf_thd);
+ ut_ad(victim_trx);
+ ut_ad(lock_mutex_own());
+ ut_ad(trx_mutex_own(victim_trx));
- /* Note that we need to release this as it will be acquired
- below in wsrep-lib */
- wsrep_thd_UNLOCK(thd);
- DEBUG_SYNC(bf_thd, "before_wsrep_thd_abort");
+ DBUG_ENTER("wsrep_innobase_kill_one_trx");
+ THD *thd= (THD *) victim_trx->mysql_thd;
+ /* Note that bf_trx might not exist here e.g. on MDL conflict
+ case (test: galera_concurrent_ctas).*/
+ trx_t* bf_trx= (trx_t*)thd_to_trx(bf_thd);
- if (wsrep_thd_bf_abort(bf_thd, thd, signal))
- {
- lock_t* wait_lock = victim_trx->lock.wait_lock;
- if (wait_lock) {
- DBUG_ASSERT(victim_trx->is_wsrep());
- WSREP_DEBUG("victim has wait flag: %lu",
- thd_get_thread_id(thd));
-
- WSREP_DEBUG("canceling wait lock");
- victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
- lock_cancel_waiting_and_release(wait_lock);
- }
- }
+ if (!thd)
+ {
+ WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id);
+ DBUG_VOID_RETURN;
+ }
- DBUG_VOID_RETURN;
+ /* Here we need to lock THD::LOCK_thd_data to protect from
+ concurrent usage or disconnect or delete. */
+ DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock");
+ wsrep_thd_LOCK(thd);
+ DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock");
+
+ WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
+
+ WSREP_DEBUG("wsrep_innobase_kill_one_trx: Aborter %s "
+ "trx_id: " TRX_ID_FMT " thread: %ld "
+ "seqno: %lld client_state: %s client_mode: %s "
+ "trx_state %s query: %s",
+ wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
+ bf_trx ? bf_trx->id : TRX_ID_MAX,
+ thd_get_thread_id(bf_thd),
+ wsrep_thd_trx_seqno(bf_thd),
+ wsrep_thd_client_state_str(bf_thd),
+ wsrep_thd_client_mode_str(bf_thd),
+ wsrep_thd_transaction_state_str(bf_thd),
+ wsrep_thd_query(bf_thd));
+
+ WSREP_DEBUG("wsrep_innobase_kill_one_trx: Victim %s "
+ "trx_id: " TRX_ID_FMT " thread: %ld "
+ "seqno: %lld client_state: %s client_mode: %s "
+ "trx_state %s query: %s",
+ wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
+ victim_trx->id,
+ thd_get_thread_id(thd),
+ wsrep_thd_trx_seqno(thd),
+ wsrep_thd_client_state_str(thd),
+ wsrep_thd_client_mode_str(thd),
+ wsrep_thd_transaction_state_str(thd),
+ wsrep_thd_query(thd));
+
+ wsrep_kill_victim(bf_thd, thd, victim_trx, signal);
+ DBUG_VOID_RETURN;
}
/** This function forces the victim transaction to abort. Aborting the
@@ -18762,29 +18776,42 @@ wsrep_abort_transaction(
THD *victim_thd,
my_bool signal)
{
- DBUG_ENTER("wsrep_abort_transaction");
- ut_ad(bf_thd);
- ut_ad(victim_thd);
-
- trx_t* victim_trx = thd_to_trx(victim_thd);
-
- WSREP_DEBUG("abort transaction: BF: %s victim: %s victim conf: %s",
- wsrep_thd_query(bf_thd),
- wsrep_thd_query(victim_thd),
- wsrep_thd_transaction_state_str(victim_thd));
-
- if (victim_trx) {
- lock_mutex_enter();
- trx_mutex_enter(victim_trx);
- wsrep_innobase_kill_one_trx(bf_thd, victim_trx, signal);
- trx_mutex_exit(victim_trx);
- lock_mutex_exit();
- DBUG_VOID_RETURN;
- } else {
- wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
- }
-
- DBUG_VOID_RETURN;
+ /* Note that victim thd is protected with
+ THD::LOCK_thd_data and THD::LOCK_thd_kill here. */
+ trx_t* victim_trx= thd_to_trx(victim_thd);
+ trx_t* bf_trx= thd_to_trx(bf_thd);
+ WSREP_DEBUG("wsrep_abort_transaction: BF:"
+ " thread %ld client_state %s client_mode %s"
+ " trans_state %s query %s trx " TRX_ID_FMT,
+ thd_get_thread_id(bf_thd),
+ wsrep_thd_client_state_str(bf_thd),
+ wsrep_thd_client_mode_str(bf_thd),
+ wsrep_thd_transaction_state_str(bf_thd),
+ wsrep_thd_query(bf_thd),
+ bf_trx ? bf_trx->id : 0);
+
+ WSREP_DEBUG("wsrep_abort_transaction: victim:"
+ " thread %ld client_state %s client_mode %s"
+ " trans_state %s query %s trx " TRX_ID_FMT,
+ thd_get_thread_id(victim_thd),
+ wsrep_thd_client_state_str(victim_thd),
+ wsrep_thd_client_mode_str(victim_thd),
+ wsrep_thd_transaction_state_str(victim_thd),
+ wsrep_thd_query(victim_thd),
+ victim_trx ? victim_trx->id : 0);
+
+ if (victim_trx)
+ {
+ lock_mutex_enter();
+ trx_mutex_enter(victim_trx);
+ wsrep_kill_victim(bf_thd, victim_thd, victim_trx, signal);
+ lock_mutex_exit();
+ trx_mutex_exit(victim_trx);
+ }
+ else
+ {
+ wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
+ }
}
static
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index 453f9e028e9..c1c41a8f77c 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -208,7 +208,11 @@ innobase_casedn_str(
char* a); /*!< in/out: string to put in lower case */
#ifdef WITH_WSREP
-void wsrep_innobase_kill_one_trx(THD *bf_thd, trx_t *victim_trx, bool signal);
+void
+wsrep_innobase_kill_one_trx(
+ THD* bf_thd,
+ trx_t *victim_trx,
+ my_bool signal);
ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
unsigned char* str, ulint str_length,
ulint buf_length);
diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc
index e5f71e0b151..8182d1230ed 100644
--- a/storage/innobase/lock/lock0wait.cc
+++ b/storage/innobase/lock/lock0wait.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2020, MariaDB Corporation.
+Copyright (c) 2014, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -185,13 +185,11 @@ lock_wait_table_reserve_slot(
check if lock timeout was for priority thread,
as a side effect trigger lock monitor
@param[in] trx transaction owning the lock
-@param[in] locked true if trx and lock_sys.mutex is ownd
@return false for regular lock timeout */
static
bool
wsrep_is_BF_lock_timeout(
- const trx_t* trx,
- bool locked = true)
+ const trx_t* trx)
{
bool long_wait= (trx->error_state != DB_DEADLOCK &&
srv_monitor_timer && trx->is_wsrep() &&
@@ -205,21 +203,10 @@ wsrep_is_BF_lock_timeout(
ib::info() << "WSREP: BF lock wait long for trx:" << trx->id
<< " query: " << wsrep_thd_query(trx->mysql_thd);
- if (!locked)
- lock_mutex_enter();
-
- ut_ad(lock_mutex_own());
-
- trx_print_latched(stderr, trx, 3000);
- /* Note this will release lock_sys mutex */
- lock_print_info_all_transactions(stderr);
-
- if (locked)
- lock_mutex_enter();
-
return was_wait;
- } else
+ } else {
return false;
+ }
}
#endif /* WITH_WSREP */
@@ -388,7 +375,7 @@ lock_wait_suspend_thread(
&& wait_time > (double) lock_wait_timeout
#ifdef WITH_WSREP
&& (!trx->is_wsrep()
- || (!wsrep_is_BF_lock_timeout(trx, false)
+ || (!wsrep_is_BF_lock_timeout(trx)
&& trx->error_state != DB_DEADLOCK))
#endif /* WITH_WSREP */
) {