summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/suite/galera/r/galera_UK_conflict.result13
-rw-r--r--mysql-test/suite/galera/r/galera_bf_kill_debug.result209
-rw-r--r--mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result16
-rw-r--r--mysql-test/suite/galera/t/galera_UK_conflict.test3
-rw-r--r--mysql-test/suite/galera/t/galera_bf_kill_debug.cnf6
-rw-r--r--mysql-test/suite/galera/t/galera_bf_kill_debug.test321
-rw-r--r--mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test14
-rw-r--r--sql/handler.cc1
-rw-r--r--sql/mysqld.cc9
-rw-r--r--sql/mysqld.h2
-rw-r--r--sql/service_wsrep.cc33
-rw-r--r--sql/sql_class.cc26
-rw-r--r--sql/sql_class.h11
-rw-r--r--sql/sql_parse.cc34
-rw-r--r--sql/wsrep_mysqld.cc109
-rw-r--r--sql/wsrep_thd.cc36
-rw-r--r--storage/innobase/handler/ha_innodb.cc133
-rw-r--r--storage/innobase/include/ha_prototypes.h8
-rw-r--r--storage/innobase/lock/lock0wait.cc21
-rw-r--r--storage/innobase/srv/srv0conc.cc12
20 files changed, 691 insertions, 326 deletions
diff --git a/mysql-test/suite/galera/r/galera_UK_conflict.result b/mysql-test/suite/galera/r/galera_UK_conflict.result
index 44bb64c9d63..cc7e17d7c58 100644
--- a/mysql-test/suite/galera/r/galera_UK_conflict.result
+++ b/mysql-test/suite/galera/r/galera_UK_conflict.result
@@ -68,9 +68,9 @@ f1 f2 f3
10 10 0
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
-DROP TABLE t1;
-test scenario 2
-connection node_1;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+7
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2));
INSERT INTO t1 VALUES (1, 1, 0);
INSERT INTO t1 VALUES (3, 3, 0);
@@ -92,9 +92,9 @@ SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync';
connection node_1;
-COMMIT;
-connection node_1a;
-SET SESSION wsrep_on = 0;
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+7
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_replay_cb";
@@ -125,6 +125,7 @@ f1 f2 f3
3 3 1
4 4 2
5 5 2
+8 8 8
10 10 0
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
diff --git a/mysql-test/suite/galera/r/galera_bf_kill_debug.result b/mysql-test/suite/galera/r/galera_bf_kill_debug.result
index c3eae243f47..2c7227c25c7 100644
--- a/mysql-test/suite/galera/r/galera_bf_kill_debug.result
+++ b/mysql-test/suite/galera/r/galera_bf_kill_debug.result
@@ -1,54 +1,165 @@
connection node_2;
connection node_1;
-connection node_2;
-CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
-insert into t1 values (NULL,1);
-connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
-connection node_2a;
-truncate t1;
-insert into t1 values (1,0);
+#
+# Case 1: We execute bf kill to wsrep_innobase_kill_one_trx
+# function just before wsrep_thd_LOCK(thd) call. Then we
+# try to kill victim transaction by KILL QUERY
+#
+CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
+connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
begin;
-update t1 set b=2 where a=1;
-connection node_2;
-set session wsrep_sync_wait=0;
-connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2;
-connection node_2b;
-SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
-connection node_1;
-select * from t1;
-a b
-1 0
-update t1 set b= 1 where a=1;
-connection node_2b;
-SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
-connection node_2;
-SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
-connection node_2b;
-SET DEBUG_SYNC='now WAIT_FOR awake_reached';
-SET GLOBAL debug_dbug = "";
-SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
-SET DEBUG_SYNC = "now SIGNAL continue_kill";
-connection node_2;
-connection node_2a;
-select * from t1;
-connection node_2;
-SET DEBUG_SYNC = "RESET";
-drop table t1;
-disconnect node_2a;
-connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
-connection node_2a;
-CREATE TABLE t1 (i int primary key);
-SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
-INSERT INTO t1 VALUES (1);
-connection node_2;
-SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
-SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
-SET DEBUG_SYNC='RESET';
-connection node_2a;
+update t1 set b = b * 10 where id between 2 and 4;
+connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+SET DEBUG_SYNC='wsrep_before_BF_victim_lock SIGNAL bf_kill WAIT_FOR bf_continue';
+ALTER TABLE t1 ADD UNIQUE KEY b1(b);;
+connection node_1;
+SET DEBUG_SYNC='now WAIT_FOR bf_kill';
+connection node_1b;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `id` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`id`),
+ UNIQUE KEY `b1` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+id b
+1 1
+2 2
+3 3
+4 4
+5 5
+connection node_1;
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
+disconnect node_1a;
+disconnect node_1b;
+disconnect node_1c;
+#
+# Case 2: We execute bf kill to wsrep_innobase_kill_one_trx
+# function just after wsrep_thd_LOCK(thd) call. Then we
+# try to kill victim transaction by KILL QUERY
+#
+CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
+connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+begin;
+update t1 set b = b * 10 where id between 2 and 4;
+connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+SET DEBUG_SYNC='wsrep_after_BF_victim_lock SIGNAL bf_kill WAIT_FOR bf_continue';
+ALTER TABLE t1 ADD UNIQUE KEY b1(b);;
+connection node_1;
+SET DEBUG_SYNC='now WAIT_FOR bf_kill';
+connection node_1b;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `id` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`id`),
+ UNIQUE KEY `b1` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+id b
+1 1
+2 2
+3 3
+4 4
+5 5
+connection node_1;
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
+disconnect node_1a;
+disconnect node_1b;
+disconnect node_1c;
+#
+# Case 3: Create victim transaction and try to send user KILL
+# from several threads
+#
+CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
+connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+begin;
+update t1 set b = b * 10 where id between 2 and 4;
+connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+connect node_1d, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+connection node_1b;
+connection node_1c;
+connection node_1d;
+connection node_1;
+disconnect node_1a;
+disconnect node_1b;
+disconnect node_1c;
+disconnect node_1d;
+DROP TABLE t1;
+#
+# Case 4: MDL-conflict, we execute ALTER until we hit gap in
+# wsrep_abort_transaction, while we are there we try to
+# manually KILL conflicting transaction (UPDATE) and
+# send conflicting transaction from other node to be executed
+# in this node by applier. As ALTER and KILL are TOI they
+# are not executed concurrently. Similarly UPDATE from other
+# node will wait for certification.
+#
+CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
+connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+begin;
+update t1 set b = b * 10 where id between 2 and 4;
+connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
+SET DEBUG_SYNC='wsrep_abort_victim_unlocked SIGNAL bf_kill_unlocked WAIT_FOR bf_continue';
+ALTER TABLE t1 ADD UNIQUE KEY b1(b);;
+connection node_1;
+SET DEBUG_SYNC='now WAIT_FOR bf_kill_unlocked';
+connection node_1b;
connection node_2;
-select * from t1;
-i
-1
-disconnect node_2a;
+update t1 set b = b + 1000 where id between 2 and 4;;
connection node_1;
-drop table t1;
+SET DEBUG_SYNC='now SIGNAL bf_continue';
+connection node_1c;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `id` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`id`),
+ UNIQUE KEY `b1` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+SELECT * FROM t1;
+id b
+1 1
+5 5
+2 1002
+3 1003
+4 1004
+connection node_1b;
+connection node_1;
+SET DEBUG_SYNC= 'RESET';
+SELECT * FROM t1;
+id b
+1 1
+5 5
+2 1002
+3 1003
+4 1004
+connection node_2;
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `id` int(11) NOT NULL,
+ `b` int(11) DEFAULT NULL,
+ PRIMARY KEY (`id`),
+ UNIQUE KEY `b1` (`b`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+SELECT * FROM t1;
+id b
+1 1
+5 5
+2 1002
+3 1003
+4 1004
+DROP TABLE t1;
+disconnect node_1a;
+disconnect node_1c;
diff --git a/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result b/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result
index 6e55c59ad15..2493075b635 100644
--- a/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result
+++ b/mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result
@@ -23,22 +23,6 @@ connection node_1a;
connection node_1b;
connection node_2;
connection node_2a;
-connection node_1;
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-COUNT(*)
-20001
-SELECT COUNT(*) FROM child;
-COUNT(*)
-10000
-connection node_2;
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-COUNT(*)
-20001
-SELECT COUNT(*) FROM child;
-COUNT(*)
-10000
DROP TABLE child;
DROP TABLE parent;
DROP TABLE ten;
diff --git a/mysql-test/suite/galera/t/galera_UK_conflict.test b/mysql-test/suite/galera/t/galera_UK_conflict.test
index 9978ba9b8bf..7e5b5541214 100644
--- a/mysql-test/suite/galera/t/galera_UK_conflict.test
+++ b/mysql-test/suite/galera/t/galera_UK_conflict.test
@@ -140,7 +140,8 @@ SELECT * FROM t1;
# original state in node 1
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
-
+SELECT COUNT(*) FROM t1;
+SELECT * FROM t1;
DROP TABLE t1;
##################################################################################
diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf b/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf
index e68f891792c..77bb6af9f35 100644
--- a/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf
+++ b/mysql-test/suite/galera/t/galera_bf_kill_debug.cnf
@@ -1,7 +1,9 @@
!include ../galera_2nodes.cnf
[mysqld.1]
-wsrep-debug=SERVER
+wsrep_log_conflicts=ON
+wsrep_debug=1
[mysqld.2]
-wsrep-debug=SERVER
+wsrep_log_conflicts=ON
+wsrep_debug=1
diff --git a/mysql-test/suite/galera/t/galera_bf_kill_debug.test b/mysql-test/suite/galera/t/galera_bf_kill_debug.test
index c322f283757..f83d4a28ce9 100644
--- a/mysql-test/suite/galera/t/galera_bf_kill_debug.test
+++ b/mysql-test/suite/galera/t/galera_bf_kill_debug.test
@@ -1,140 +1,283 @@
--source include/galera_cluster.inc
---source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
+--echo #
+--echo # Case 1: We execute bf kill to wsrep_innobase_kill_one_trx
+--echo # function just before wsrep_thd_LOCK(thd) call. Then we
+--echo # try to kill victim transaction by KILL QUERY
+--echo #
+
+CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
+
#
-# Test case 7:
-# 1. Start a transaction on node_2,
-# and leave it pending while holding a row locked
-# 2. set sync point pause applier
-# 3. send a conflicting write on node_1, it will pause
-# at the sync point
-# 4. though another connection to node_2, kill the local
-# transaction
+# This will be victim transaction for both bf kill and
+# user KILL
#
+--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
+begin;
+update t1 set b = b * 10 where id between 2 and 4;
---connection node_2
-CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
-insert into t1 values (NULL,1);
+#
+# Take thread id for above query
+#
+--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
+--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`
#
-# connection node_2a runs a local transaction, that is victim of BF abort
-# and victim of KILL command by connection node_2
+# Set DEBUG_SYNC and send conflicting DDL that will be TOI (bf) and
+# cause bf_kill
#
---connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
---connection node_2a
-truncate t1;
-insert into t1 values (1,0);
+--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1
+SET DEBUG_SYNC='wsrep_before_BF_victim_lock SIGNAL bf_kill WAIT_FOR bf_continue';
+--send ALTER TABLE t1 ADD UNIQUE KEY b1(b);
-# start a transaction that will conflict with later applier
-begin;
-update t1 set b=2 where a=1;
+#
+# Wait until we have reached the sync point
+#
+--connection node_1
+SET DEBUG_SYNC='now WAIT_FOR bf_kill';
---connection node_2
-set session wsrep_sync_wait=0;
---let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1
---source include/wait_condition.inc
+#
+# Try to kill update query
+#
+--connection node_1b
+--disable_query_log
+--send_eval KILL QUERY $k_thread;
+
+
+#
+# Let bf_kill continue
+#
+--connection node_1
+SET DEBUG_SYNC='now SIGNAL bf_continue';
+--connection node_1c
+--reap
+SHOW CREATE TABLE t1;
+SELECT * FROM t1;
+
+--connection node_1b
+--reap
+--enable_query_log
+
+--connection node_1
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
+
+--disconnect node_1a
+--disconnect node_1b
+--disconnect node_1c
+
+--echo #
+--echo # Case 2: We execute bf kill to wsrep_innobase_kill_one_trx
+--echo # function just after wsrep_thd_LOCK(thd) call. Then we
+--echo # try to kill victim transaction by KILL QUERY
+--echo #
+
+CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
+#
+# This will be victim transaction for both bf kill and
+# user KILL
+#
+--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
+begin;
+update t1 set b = b * 10 where id between 2 and 4;
+
+#
+# Take thread id for above query
+#
+--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`
-# connection node_2b is for controlling debug syn points
-# first set a sync point for applier, to pause during BF aborting
-# and before THD::awake would be called
#
---connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2
---connection node_2b
-SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
+# Set DEBUG_SYNC and send conflicting DDL that will be TOI (bf) and
+# cause bf_kill
+#
+--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1
+SET DEBUG_SYNC='wsrep_after_BF_victim_lock SIGNAL bf_kill WAIT_FOR bf_continue';
+--send ALTER TABLE t1 ADD UNIQUE KEY b1(b);
#
-# replicate an update, which will BF abort the victim node_2a
-# however, while applier in node 2 is handling the abort,
-# it will pause in sync point set by node_2b
+# Wait until we have reached the sync point
#
--connection node_1
-select * from t1;
-update t1 set b= 1 where a=1;
+SET DEBUG_SYNC='now WAIT_FOR bf_kill';
#
-# wait until the applying of above update has reached the sync point
-# in node 2
+# Try to kill update query
#
---connection node_2b
-SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
+--connection node_1b
+--disable_query_log
+--send_eval KILL QUERY $k_thread;
---connection node_2
#
-# pause KILL execution before awake
+# Let bf_kill continue
#
-SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
---disable_query_log
---send_eval KILL $k_thread
+--connection node_1
+SET DEBUG_SYNC='now SIGNAL bf_continue';
+--connection node_1c
+--reap
+SHOW CREATE TABLE t1;
+SELECT * FROM t1;
+
+--connection node_1b
+--reap
--enable_query_log
+--connection node_1
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
---connection node_2b
-SET DEBUG_SYNC='now WAIT_FOR awake_reached';
+--disconnect node_1a
+--disconnect node_1b
+--disconnect node_1c
-# release applier and KILL operator
-SET GLOBAL debug_dbug = "";
-SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
-SET DEBUG_SYNC = "now SIGNAL continue_kill";
+--echo #
+--echo # Case 3: Create victim transaction and try to send user KILL
+--echo # from several threads
+--echo #
---connection node_2
---reap
+CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
---connection node_2a
---error 0,1213,2013
-select * from t1;
+#
+# This will be victim transaction for user KILL
+#
+--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
+begin;
+update t1 set b = b * 10 where id between 2 and 4;
---connection node_2
-SET DEBUG_SYNC = "RESET";
+#
+# Take thread id for above query
+#
+--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
+--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1
+--connect node_1d, 127.0.0.1, root, , test, $NODE_MYPORT_1
-drop table t1;
+--connection node_1b
+--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`
---disconnect node_2a
#
-# Test case 7:
-# run a transaction in node 2, and set a sync point to pause the transaction
-# in commit phase.
-# Through another connection to node 2, kill the committing transaction by
-# KILL QUERY command
+# Try to kill update query from several connections concurrently
#
+--disable_query_log
+--send_eval KILL QUERY $k_thread;
---connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
---connection node_2a
---let $connection_id = `SELECT CONNECTION_ID()`
+--connection node_1c
+--disable_query_log
+--send_eval KILL QUERY $k_thread;
-CREATE TABLE t1 (i int primary key);
+--connection node_1d
+--disable_query_log
+--send_eval KILL QUERY $k_thread;
-# Set up sync point
-SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
+#
+# We do not know execution order so any of these could fail as KILL
+# has been already done
+#
+--connection node_1b
+--enable_query_log
+--error 0,ER_KILL_DENIED_ERROR
+--reap
+--connection node_1c
+--enable_query_log
+--error 0,ER_KILL_DENIED_ERROR
+--reap
+--connection node_1d
+--enable_query_log
+--error 0,ER_KILL_DENIED_ERROR
+--reap
-# Send insert which will block in the sync point above
---send INSERT INTO t1 VALUES (1)
+--connection node_1
+--disconnect node_1a
+--disconnect node_1b
+--disconnect node_1c
+--disconnect node_1d
+DROP TABLE t1;
---connection node_2
-SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
+--echo #
+--echo # Case 4: MDL-conflict, we execute ALTER until we hit gap in
+--echo # wsrep_abort_transaction, while we are there we try to
+--echo # manually KILL conflicting transaction (UPDATE) and
+--echo # send conflicting transaction from other node to be executed
+--echo # in this node by applier. As ALTER and KILL are TOI they
+--echo # are not executed concurrently. Similarly UPDATE from other
+--echo # node will wait for certification.
+--echo #
+
+CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
+INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
+
+#
+# This will be victim transaction for both bf kill and
+# user KILL, and should not have any effect on result
+#
+--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
+begin;
+update t1 set b = b * 10 where id between 2 and 4;
+
+#
+# Take thread id for above query
+#
+--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
+--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`
+#
+# Set DEBUG_SYNC and send conflicting DDL that will be TOI (bf) and
+# cause bf_kill but let's execute it only to gap in wsrep_abort_transaction
+#
+--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1
+SET DEBUG_SYNC='wsrep_abort_victim_unlocked SIGNAL bf_kill_unlocked WAIT_FOR bf_continue';
+--send ALTER TABLE t1 ADD UNIQUE KEY b1(b);
+
+#
+# Wait until we have reached the sync point
+#
+--connection node_1
+SET DEBUG_SYNC='now WAIT_FOR bf_kill_unlocked';
+
+#
+# Try to kill update query
+#
+--connection node_1b
--disable_query_log
---disable_result_log
-# victim has passed the point of no return, kill is not possible anymore
---eval KILL QUERY $connection_id
---enable_result_log
+--send_eval KILL QUERY $k_thread;
+
+#
+# Send conflicting update from other node, this should be applied on both nodes
+# but should not kill ALTER
+#
--enable_query_log
+--connection node_2
+--send update t1 set b = b + 1000 where id between 2 and 4;
-SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
-SET DEBUG_SYNC='RESET';
---connection node_2a
---error 0,1213
+#
+# Let bf_kill continue
+#
+--connection node_1
+SET DEBUG_SYNC='now SIGNAL bf_continue';
+--connection node_1c
--reap
+SHOW CREATE TABLE t1;
+SELECT * FROM t1;
---connection node_2
-# victim was able to complete the INSERT
-select * from t1;
-
---disconnect node_2a
+--connection node_1b
+--reap
+--enable_query_log
--connection node_1
-drop table t1;
+SET DEBUG_SYNC= 'RESET';
+SELECT * FROM t1;
+
+--connection node_2
+--reap
+SHOW CREATE TABLE t1;
+SELECT * FROM t1;
+DROP TABLE t1;
+
+--disconnect node_1a
+--disconnect node_1c
diff --git a/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test b/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test
index fadc94d78ff..3b4b427f551 100644
--- a/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test
+++ b/mysql-test/suite/galera/t/galera_toi_ddl_fk_insert.test
@@ -54,15 +54,11 @@ INSERT INTO parent VALUES (1, 0);
--connection node_2a
--reap
---connection node_1
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-SELECT COUNT(*) FROM child;
-
---connection node_2
-SET SESSION wsrep_sync_wait=15;
-SELECT COUNT(*) FROM parent;
-SELECT COUNT(*) FROM child;
+#
+# ALTER TABLE could bf kill one or more of INSERTs to parent, so
+# the actual number of rows in PARENT depends on whether
+# the INSERT is committed before ALTER TABLE is executed
+#
DROP TABLE child;
DROP TABLE parent;
diff --git a/sql/handler.cc b/sql/handler.cc
index 757fa95a9a3..8dd3868e6fb 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -859,7 +859,6 @@ static my_bool kill_handlerton(THD *thd, plugin_ref plugin,
{
handlerton *hton= plugin_hton(plugin);
- mysql_mutex_assert_owner(&thd->LOCK_thd_data);
if (hton->state == SHOW_OPTION_YES && hton->kill_query &&
thd_get_ha_data(thd, hton))
hton->kill_query(hton, thd, *(enum thd_kill_levels *) level);
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index ba1d477882f..c225e35153a 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -2122,7 +2122,7 @@ static void clean_up_mutexes()
****************************************************************************/
#ifdef EMBEDDED_LIBRARY
-void close_connection(THD *thd, uint sql_errno)
+void close_connection(THD *thd, uint sql_errno, my_bool locked)
{
}
#else
@@ -2526,7 +2526,7 @@ static void network_init(void)
For the connection that is doing shutdown, this is called twice
*/
-void close_connection(THD *thd, uint sql_errno)
+void close_connection(THD *thd, uint sql_errno, my_bool locked)
{
int lvl= (thd->main_security_ctx.user ? 3 : 1);
DBUG_ENTER("close_connection");
@@ -2542,7 +2542,10 @@ void close_connection(THD *thd, uint sql_errno)
"This connection closed normally without"
" authentication"));
- thd->disconnect();
+ if (locked)
+ thd->disconnect_mutexed();
+ else
+ thd->disconnect();
MYSQL_CONNECTION_DONE((int) sql_errno, thd->thread_id);
diff --git a/sql/mysqld.h b/sql/mysqld.h
index 8e33d6488e4..e090042f442 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -75,7 +75,7 @@ enum enum_slave_parallel_mode {
/* Function prototypes */
void kill_mysql(THD *thd);
-void close_connection(THD *thd, uint sql_errno= 0);
+void close_connection(THD *thd, uint sql_errno= 0, my_bool locked=false);
void handle_connection_in_main_thread(CONNECT *thd);
void create_thread_to_handle_connection(CONNECT *connect);
void unlink_thd(THD *thd);
diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc
index 14f136ca480..2ef1ea26e61 100644
--- a/sql/service_wsrep.cc
+++ b/sql/service_wsrep.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 Codership Oy <info@codership.com>
+/* Copyright 2018-2021 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -29,12 +29,14 @@ extern "C" my_bool wsrep_on(const THD *thd)
extern "C" void wsrep_thd_LOCK(const THD *thd)
{
+ mysql_mutex_lock(&thd->LOCK_thd_kill);
mysql_mutex_lock(&thd->LOCK_thd_data);
}
extern "C" void wsrep_thd_UNLOCK(const THD *thd)
{
mysql_mutex_unlock(&thd->LOCK_thd_data);
+ mysql_mutex_unlock(&thd->LOCK_thd_kill);
}
extern "C" void wsrep_thd_kill_LOCK(const THD *thd)
@@ -189,6 +191,9 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
DBUG_ASSERT(wsrep_thd_is_SR(victim_thd));
if (!victim_thd || !wsrep_on(bf_thd)) return;
+ mysql_mutex_lock(&victim_thd->LOCK_thd_kill);
+ mysql_mutex_lock(&victim_thd->LOCK_thd_data);
+
WSREP_DEBUG("handle rollback, for deadlock: thd %llu trx_id %" PRIu64 " frags %zu conf %s",
victim_thd->thread_id,
victim_thd->wsrep_trx_id(),
@@ -209,6 +214,10 @@ extern "C" void wsrep_handle_SR_rollback(THD *bf_thd,
{
wsrep_thd_self_abort(victim_thd);
}
+
+ mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
+ mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
+
if (bf_thd)
{
wsrep_store_threadvars(bf_thd);
@@ -219,7 +228,7 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
my_bool signal)
{
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
- mysql_mutex_assert_not_owner(&victim_thd->LOCK_thd_data);
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
my_bool ret= wsrep_bf_abort(bf_thd, victim_thd);
/*
Send awake signal if victim was BF aborted or does not
@@ -228,22 +237,19 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
*/
if ((ret || !wsrep_on(victim_thd)) && signal)
{
- mysql_mutex_lock(&victim_thd->LOCK_thd_data);
-
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
{
WSREP_DEBUG("victim is killed already by %llu, skipping awake",
victim_thd->wsrep_aborter);
- mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
return false;
}
victim_thd->wsrep_aborter= bf_thd->thread_id;
victim_thd->awake_no_mutex(KILL_QUERY);
- mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
- } else {
- WSREP_DEBUG("wsrep_thd_bf_abort skipped awake");
}
+ else
+ WSREP_DEBUG("wsrep_thd_bf_abort skipped awake");
+
return ret;
}
@@ -268,13 +274,12 @@ extern "C" my_bool wsrep_thd_order_before(const THD *left, const THD *right)
extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd)
{
+ DBUG_ASSERT(thd);
mysql_mutex_assert_owner(&thd->LOCK_thd_data);
- if (thd != 0)
+ const wsrep::client_state& cs(thd->wsrep_cs());
+ const enum wsrep::transaction::state tx_state(cs.transaction().state());
+ switch (tx_state)
{
- const wsrep::client_state& cs(thd->wsrep_cs());
- const enum wsrep::transaction::state tx_state(cs.transaction().state());
- switch (tx_state)
- {
case wsrep::transaction::s_must_abort:
return (cs.state() == wsrep::client_state::s_exec ||
cs.state() == wsrep::client_state::s_result);
@@ -283,9 +288,7 @@ extern "C" my_bool wsrep_thd_is_aborting(const MYSQL_THD thd)
return true;
default:
return false;
- }
}
- return false;
}
static inline enum wsrep::key::type
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 28bf77c94e8..c2dcc07997e 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1977,13 +1977,13 @@ void THD::abort_current_cond_wait(bool force)
the Vio might be disassociated concurrently.
*/
-void THD::disconnect()
+void THD::disconnect_mutexed()
{
Vio *vio= NULL;
- set_killed(KILL_CONNECTION);
-
- mysql_mutex_lock(&LOCK_thd_data);
+ mysql_mutex_assert_owner(&LOCK_thd_data);
+ mysql_mutex_assert_owner(&LOCK_thd_kill);
+ set_killed_no_mutex(KILL_CONNECTION);
#ifdef SIGNAL_WITH_VIO_CLOSE
/*
@@ -1999,8 +1999,6 @@ void THD::disconnect()
if (net.vio != vio)
vio_close(net.vio);
net.thd= 0; // Don't collect statistics
-
- mysql_mutex_unlock(&LOCK_thd_data);
}
@@ -2027,6 +2025,9 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use,
if (needs_thr_lock_abort)
{
+ /* Protect thread from concurrent disconnect and delete */
+ mysql_mutex_lock(&in_use->LOCK_thd_kill);
+ /* Protect thread from concurrent usage */
mysql_mutex_lock(&in_use->LOCK_thd_data);
/* If not already dying */
if (in_use->killed != KILL_CONNECTION_HARD)
@@ -2043,11 +2044,20 @@ bool THD::notify_shared_lock(MDL_context_owner *ctx_in_use,
thread can see those instances (e.g. see partitioning code).
*/
if (!thd_table->needs_reopen())
- {
signalled|= mysql_lock_abort_for_thread(this, thd_table);
- }
}
+#ifdef WITH_WSREP
+ if (WSREP(this) && wsrep_thd_is_BF(this, false))
+ {
+ WSREP_DEBUG("notify_shared_lock: BF thread %llu query %s"
+ " victim %llu query %s",
+ this->real_id, wsrep_thd_query(this),
+ in_use->real_id, wsrep_thd_query(in_use));
+ wsrep_abort_thd(this, in_use, false);
+ }
+#endif /* WITH_WSREP */
}
+ mysql_mutex_unlock(&in_use->LOCK_thd_kill);
mysql_mutex_unlock(&in_use->LOCK_thd_data);
}
DBUG_RETURN(signalled);
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 7570211f586..cf90f78ea30 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -3336,8 +3336,15 @@ public:
void abort_current_cond_wait(bool force);
/** Disconnect the associated communication endpoint. */
- void disconnect();
-
+ inline void disconnect()
+ {
+ mysql_mutex_lock(&LOCK_thd_kill);
+ mysql_mutex_lock(&LOCK_thd_data);
+ disconnect_mutexed();
+ mysql_mutex_unlock(&LOCK_thd_kill);
+ mysql_mutex_unlock(&LOCK_thd_data);
+ }
+ void disconnect_mutexed();
/*
Allows this thread to serve as a target for others to schedule Async
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 8999397fee7..276608e036c 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -9332,6 +9332,18 @@ static
void sql_kill(THD *thd, longlong id, killed_state state, killed_type type)
{
uint error;
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ WSREP_DEBUG("sql_kill called");
+ if (thd->wsrep_applier)
+ {
+ WSREP_DEBUG("KILL in applying, bailing out here");
+ return;
+ }
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
+ }
+#endif /* WITH_WSREP */
if (likely(!(error= kill_one_thread(thd, id, state, type))))
{
if (!thd->killed)
@@ -9341,6 +9353,11 @@ void sql_kill(THD *thd, longlong id, killed_state state, killed_type type)
}
else
my_error(error, MYF(0), id);
+#ifdef WITH_WSREP
+ return;
+ wsrep_error_label:
+ my_error(ER_CANNOT_USER, MYF(0), wsrep_thd_query(thd));
+#endif /* WITH_WSREP */
}
@@ -9349,6 +9366,18 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
{
uint error;
ha_rows rows;
+#ifdef WITH_WSREP
+ if (WSREP(thd))
+ {
+ WSREP_DEBUG("sql_kill_user called");
+ if (thd->wsrep_applier)
+ {
+ WSREP_DEBUG("KILL in applying, bailing out here");
+ return;
+ }
+ WSREP_TO_ISOLATION_BEGIN(WSREP_MYSQL_DB, NULL, NULL)
+ }
+#endif /* WITH_WSREP */
if (likely(!(error= kill_threads_for_user(thd, user, state, &rows))))
my_ok(thd, rows);
else
@@ -9359,6 +9388,11 @@ void sql_kill_user(THD *thd, LEX_USER *user, killed_state state)
*/
my_error(error, MYF(0), user->host.str, user->user.str);
}
+#ifdef WITH_WSREP
+ return;
+ wsrep_error_label:
+ my_error(ER_CANNOT_USER, MYF(0), user->user.str);
+#endif /* WITH_WSREP */
}
diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc
index 55564dbe235..92f1760cbff 100644
--- a/sql/wsrep_mysqld.cc
+++ b/sql/wsrep_mysqld.cc
@@ -2319,7 +2319,9 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
request_thd, granted_thd);
ticket->wsrep_report(wsrep_debug);
+ mysql_mutex_lock(&granted_thd->LOCK_thd_kill);
mysql_mutex_lock(&granted_thd->LOCK_thd_data);
+
if (wsrep_thd_is_toi(granted_thd) ||
wsrep_thd_is_applying(granted_thd))
{
@@ -2328,13 +2330,15 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
WSREP_DEBUG("BF thread waiting for SR in aborting state");
ticket->wsrep_report(wsrep_debug);
mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ mysql_mutex_unlock(&granted_thd->LOCK_thd_kill);
}
else if (wsrep_thd_is_SR(granted_thd) && !wsrep_thd_is_SR(request_thd))
{
- WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR",
+ WSREP_MDL_LOG(INFO, "MDL conflict, DDL vs SR",
schema, schema_len, request_thd, granted_thd);
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
wsrep_abort_thd(request_thd, granted_thd, 1);
+ mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ mysql_mutex_unlock(&granted_thd->LOCK_thd_kill);
}
else
{
@@ -2342,6 +2346,7 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
request_thd, granted_thd);
ticket->wsrep_report(true);
mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ mysql_mutex_unlock(&granted_thd->LOCK_thd_kill);
unireg_abort(1);
}
}
@@ -2351,14 +2356,16 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
WSREP_DEBUG("BF thread waiting for FLUSH");
ticket->wsrep_report(wsrep_debug);
mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ mysql_mutex_unlock(&granted_thd->LOCK_thd_kill);
}
else if (request_thd->lex->sql_command == SQLCOM_DROP_TABLE)
{
WSREP_DEBUG("DROP caused BF abort, conf %s",
wsrep_thd_transaction_state_str(granted_thd));
ticket->wsrep_report(wsrep_debug);
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
wsrep_abort_thd(request_thd, granted_thd, 1);
+ mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ mysql_mutex_unlock(&granted_thd->LOCK_thd_kill);
}
else
{
@@ -2367,8 +2374,9 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
ticket->wsrep_report(wsrep_debug);
if (granted_thd->wsrep_trx().active())
{
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
wsrep_abort_thd(request_thd, granted_thd, 1);
+ mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ mysql_mutex_unlock(&granted_thd->LOCK_thd_kill);
}
else
{
@@ -2376,10 +2384,11 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
Granted_thd is likely executing with wsrep_on=0. If the requesting
thd is BF, BF abort and wait.
*/
- mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
- if (wsrep_thd_is_BF(request_thd, FALSE))
+ if (wsrep_thd_is_BF(request_thd, false))
{
ha_abort_transaction(request_thd, granted_thd, TRUE);
+ mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
+ mysql_mutex_unlock(&granted_thd->LOCK_thd_kill);
}
else
{
@@ -2392,15 +2401,15 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
}
}
else
- {
mysql_mutex_unlock(&request_thd->LOCK_thd_data);
- }
}
/**/
static bool abort_replicated(THD *thd)
{
bool ret_code= false;
+ mysql_mutex_assert_owner(&thd->LOCK_thd_data);
+ mysql_mutex_assert_owner(&thd->LOCK_thd_kill);
if (thd->wsrep_trx().state() == wsrep::transaction::s_committing)
{
WSREP_DEBUG("aborting replicated trx: %llu", (ulonglong)(thd->real_id));
@@ -2412,29 +2421,35 @@ static bool abort_replicated(THD *thd)
}
/**/
-static inline bool is_client_connection(THD *thd)
+static inline my_bool is_client_connection(THD *thd, my_bool *sync)
{
- return (thd->wsrep_client_thread && thd->variables.wsrep_on);
+ if (sync)
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ my_bool ret= (thd->wsrep_client_thread && thd->variables.wsrep_on);
+ if (sync)
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
+
+ return ret;
}
-static inline bool is_replaying_connection(THD *thd)
+static inline my_bool is_replaying_connection(THD *thd, my_bool *sync)
{
- bool ret;
-
- mysql_mutex_lock(&thd->LOCK_thd_data);
- ret= (thd->wsrep_trx().state() == wsrep::transaction::s_replaying) ? true : false;
- mysql_mutex_unlock(&thd->LOCK_thd_data);
+ if (sync)
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ my_bool ret= ((thd->wsrep_trx().state() == wsrep::transaction::s_replaying) ? true : false);
+ if (sync)
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
return ret;
}
-static inline bool is_committing_connection(THD *thd)
+static inline my_bool is_committing_connection(THD *thd, my_bool *sync)
{
- bool ret;
-
- mysql_mutex_lock(&thd->LOCK_thd_data);
- ret= (thd->wsrep_trx().state() == wsrep::transaction::s_committing) ? true : false;
- mysql_mutex_unlock(&thd->LOCK_thd_data);
+ if (sync)
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ my_bool ret= ((thd->wsrep_trx().state() == wsrep::transaction::s_committing) ? true : false);
+ if (sync)
+ mysql_mutex_lock(&thd->LOCK_thd_data);
return ret;
}
@@ -2443,12 +2458,17 @@ static my_bool have_client_connections(THD *thd, void*)
{
DBUG_PRINT("quit",("Informing thread %lld that it's time to die",
(longlong) thd->thread_id));
- if (is_client_connection(thd) && thd->killed == KILL_CONNECTION)
+ my_bool ret=false;
+ mysql_mutex_lock(&thd->LOCK_thd_kill);
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ if (is_client_connection(thd, NULL) && thd->killed == KILL_CONNECTION)
{
(void)abort_replicated(thd);
- return 1;
+ ret= true;
}
- return 0;
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
+ mysql_mutex_unlock(&thd->LOCK_thd_kill);
+ return ret;
}
static void wsrep_close_thread(THD *thd)
@@ -2460,59 +2480,72 @@ static void wsrep_close_thread(THD *thd)
mysql_mutex_unlock(&thd->LOCK_thd_kill);
}
-static my_bool have_committing_connections(THD *thd, void *)
+static my_bool have_committing_connections(THD *thd, my_bool *sync)
{
- return is_client_connection(thd) && is_committing_connection(thd) ? 1 : 0;
+ my_bool *need_sync= sync ? NULL : (my_bool *)thd;
+ if (sync)
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ my_bool ret= (is_client_connection(thd, need_sync) && is_committing_connection(thd, need_sync) ? 1 : 0);
+ if (sync)
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
+ return ret;
}
int wsrep_wait_committing_connections_close(int wait_time)
{
int sleep_time= 100;
+ my_bool sync=true;
WSREP_DEBUG("wait for committing transaction to close: %d sleep: %d", wait_time, sleep_time);
- while (server_threads.iterate(have_committing_connections) && wait_time > 0)
+ while (server_threads.iterate(have_committing_connections, &sync) && wait_time > 0)
{
WSREP_DEBUG("wait for committing transaction to close: %d", wait_time);
my_sleep(sleep_time);
wait_time -= sleep_time;
}
- return server_threads.iterate(have_committing_connections);
+ return server_threads.iterate(have_committing_connections, &sync);
}
static my_bool kill_all_threads(THD *thd, THD *caller_thd)
{
DBUG_PRINT("quit", ("Informing thread %lld that it's time to die",
(longlong) thd->thread_id));
+ mysql_mutex_lock(&thd->LOCK_thd_kill);
+ mysql_mutex_lock(&thd->LOCK_thd_data);
/* We skip slave threads & scheduler on this first loop through. */
- if (is_client_connection(thd) && thd != caller_thd)
+ if (is_client_connection(thd, NULL) && thd != caller_thd)
{
- if (is_replaying_connection(thd))
- thd->set_killed(KILL_CONNECTION);
+ if (is_replaying_connection(thd,NULL))
+ thd->set_killed_no_mutex(KILL_CONNECTION);
else if (!abort_replicated(thd))
{
/* replicated transactions must be skipped */
WSREP_DEBUG("closing connection %lld", (longlong) thd->thread_id);
/* instead of wsrep_close_thread() we do now soft kill by THD::awake */
- thd->awake(KILL_CONNECTION);
+ thd->awake_no_mutex(KILL_CONNECTION);
}
}
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
+ mysql_mutex_unlock(&thd->LOCK_thd_kill);
return 0;
}
static my_bool kill_remaining_threads(THD *thd, THD *caller_thd)
{
-#ifndef __bsdi__ // Bug in BSDI kernel
- if (is_client_connection(thd) &&
+ mysql_mutex_lock(&thd->LOCK_thd_kill);
+ mysql_mutex_lock(&thd->LOCK_thd_data);
+ if (is_client_connection(thd,NULL) &&
!abort_replicated(thd) &&
- !is_replaying_connection(thd) &&
+ !is_replaying_connection(thd,NULL) &&
thd_is_connection_alive(thd) &&
thd != caller_thd)
{
WSREP_INFO("killing local connection: %lld", (longlong) thd->thread_id);
- close_connection(thd);
+ close_connection(thd, true);
}
-#endif
+ mysql_mutex_unlock(&thd->LOCK_thd_data);
+ mysql_mutex_unlock(&thd->LOCK_thd_kill);
return 0;
}
diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc
index 2e02110d697..7eb08321905 100644
--- a/sql/wsrep_thd.cc
+++ b/sql/wsrep_thd.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013 Codership Oy <info@codership.com>
+/* Copyright (C) 2013-2021 Codership Oy <info@codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -64,7 +64,7 @@ static void wsrep_replication_process(THD *thd,
delete thd->wsrep_rgi->rli->mi;
delete thd->wsrep_rgi->rli;
-
+
thd->wsrep_rgi->cleanup_after_session();
delete thd->wsrep_rgi;
thd->wsrep_rgi= NULL;
@@ -314,8 +314,8 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal)
THD *victim_thd= (THD *) victim_thd_ptr;
THD *bf_thd= (THD *) bf_thd_ptr;
- mysql_mutex_lock(&victim_thd->LOCK_thd_data);
-
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
/* Note that when you use RSU node is desynced from cluster, thus WSREP(thd)
might not be true.
*/
@@ -327,16 +327,13 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal)
{
WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ?
(long long)bf_thd->real_id : 0, (long long)victim_thd->real_id);
- mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
ha_abort_transaction(bf_thd, victim_thd, signal);
- mysql_mutex_lock(&victim_thd->LOCK_thd_data);
}
else
{
WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd);
}
- mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
DBUG_RETURN(1);
}
@@ -345,6 +342,9 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
WSREP_LOG_THD(bf_thd, "BF aborter before");
WSREP_LOG_THD(victim_thd, "victim before");
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
+ mysql_mutex_assert_owner(&victim_thd->LOCK_thd_kill);
+
DBUG_EXECUTE_IF("sync.wsrep_bf_abort",
{
const char act[]=
@@ -358,7 +358,7 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active())
{
WSREP_DEBUG("wsrep_bf_abort, BF abort for non active transaction");
- switch (victim_thd->wsrep_trx().state())
+ switch (victim_thd->wsrep_trx().state())
{
case wsrep::transaction::s_aborting: /* fall through */
case wsrep::transaction::s_aborted:
@@ -367,7 +367,14 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
default:
break;
}
+
+ /* Test: galera_create_table_as_select. Here we enter wsrep-lib
+ were LOCK_thd_data will be acquired, thus we need to release it.
+ However, we can still hold LOCK_thd_kill to protect from
+ disconnect or delete. */
+ mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
wsrep_start_transaction(victim_thd, victim_thd->wsrep_next_trx_id());
+ mysql_mutex_lock(&victim_thd->LOCK_thd_data);
}
bool ret;
@@ -375,12 +382,25 @@ bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
if (wsrep_thd_is_toi(bf_thd))
{
+ /* Here we enter wsrep-lib were LOCK_thd_data will be acquired,
+ thus we need to release it. However, we can still hold
+ LOCK_thd_kill to protect from disconnect or delete. */
+ mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
ret= victim_thd->wsrep_cs().total_order_bf_abort(bf_seqno);
+ mysql_mutex_lock(&victim_thd->LOCK_thd_data);
}
else
{
+ /* Test: mysql-wsrep-features#165. Here we enter wsrep-lib
+ were LOCK_thd_data will be acquired and later LOCK_thd_kill
+ thus we need to release them. */
+ mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
+ mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
ret= victim_thd->wsrep_cs().bf_abort(bf_seqno);
+ mysql_mutex_lock(&victim_thd->LOCK_thd_kill);
+ mysql_mutex_lock(&victim_thd->LOCK_thd_data);
}
+
if (ret)
{
wsrep_bf_aborts_counter++;
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index c4368c80c9b..9ce3790b14a 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -5185,7 +5185,7 @@ static void innobase_kill_query(handlerton*, THD *thd, enum thd_kill_levels)
{
ut_ad(trx->mysql_thd == thd);
#ifdef WITH_WSREP
- if (trx->is_wsrep() && wsrep_thd_is_aborting(thd))
+ if (wsrep_thd_is_aborting(thd) || trx->lock.was_chosen_as_wsrep_victim)
/* if victim has been signaled by BF thread and/or aborting is already
progressing, following query aborting is not necessary any more.
Also, BF thread should own trx mutex for the victim. */
@@ -18637,12 +18637,6 @@ static struct st_mysql_storage_engine innobase_storage_engine=
#ifdef WITH_WSREP
-struct bg_wsrep_kill_trx_arg {
- my_thread_id thd_id, bf_thd_id;
- trx_id_t trx_id, bf_trx_id;
- bool signal;
-};
-
/** This function is used to kill one transaction.
This transaction was open on this node (not-yet-committed), and a
@@ -18666,13 +18660,11 @@ comparison as in the local certification failure.
@param[in] bf_thd Brute force (BF) thread
@param[in,out] victim_trx Vimtim trx to be killed
@param[in] signal Should victim be signaled */
-UNIV_INTERN
void
wsrep_innobase_kill_one_trx(
- THD* bf_thd,
+ MYSQL_THD const bf_thd,
trx_t *victim_trx,
- bool signal)
-
+ my_bool signal)
{
ut_ad(bf_thd);
ut_ad(victim_trx);
@@ -18680,9 +18672,14 @@ wsrep_innobase_kill_one_trx(
ut_ad(trx_mutex_own(victim_trx));
DBUG_ENTER("wsrep_innobase_kill_one_trx");
-
THD *thd= (THD *) victim_trx->mysql_thd;
- ut_ad(thd);
+
+ if (!thd)
+ {
+ WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id);
+ DBUG_VOID_RETURN;
+ }
+
/* Note that bf_trx might not exist here e.g. on MDL conflict
case (test: galera_concurrent_ctas). Similarly, BF thread
could be also acquiring MDL-lock causing victim to be
@@ -18691,31 +18688,37 @@ wsrep_innobase_kill_one_trx(
trx_t* bf_trx= thd_to_trx(bf_thd);
DBUG_ASSERT(wsrep_on(bf_thd));
- WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
+ /* Here we need to lock THD::LOCK_thd_data to protect from
+ concurrent usage or disconnect or delete. */
+ DEBUG_SYNC(bf_thd, "wsrep_before_BF_victim_lock");
+ wsrep_thd_LOCK(thd);
+ DEBUG_SYNC(bf_thd, "wsrep_after_BF_victim_lock");
WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld "
- "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
- "query: %s",
- wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
- bf_trx ? bf_trx->id : TRX_ID_MAX,
- thd_get_thread_id(bf_thd),
- wsrep_thd_trx_seqno(bf_thd),
- wsrep_thd_client_state_str(bf_thd),
- wsrep_thd_client_mode_str(bf_thd),
- wsrep_thd_transaction_state_str(bf_thd),
- wsrep_thd_query(bf_thd));
+ "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
+ "query: %s",
+ wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal",
+ bf_trx ? bf_trx->id : TRX_ID_MAX,
+ thd_get_thread_id(bf_thd),
+ wsrep_thd_trx_seqno(bf_thd),
+ wsrep_thd_client_state_str(bf_thd),
+ wsrep_thd_client_mode_str(bf_thd),
+ wsrep_thd_transaction_state_str(bf_thd),
+ wsrep_thd_query(bf_thd));
WSREP_DEBUG("Victim %s trx_id: " TRX_ID_FMT " thread: %ld "
- "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
- "query: %s",
- wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
- victim_trx->id,
- thd_get_thread_id(thd),
- wsrep_thd_trx_seqno(thd),
- wsrep_thd_client_state_str(thd),
- wsrep_thd_client_mode_str(thd),
- wsrep_thd_transaction_state_str(thd),
- wsrep_thd_query(thd));
+ "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s "
+ "query: %s",
+ wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
+ victim_trx->id,
+ thd_get_thread_id(thd),
+ wsrep_thd_trx_seqno(thd),
+ wsrep_thd_client_state_str(thd),
+ wsrep_thd_client_mode_str(thd),
+ wsrep_thd_transaction_state_str(thd),
+ wsrep_thd_query(thd));
+
+ WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
/* Mark transaction as a victim for Galera abort */
victim_trx->lock.was_chosen_as_wsrep_victim= true;
@@ -18741,6 +18744,8 @@ wsrep_innobase_kill_one_trx(
}
}
+ wsrep_thd_UNLOCK(thd);
+
DBUG_VOID_RETURN;
}
@@ -18764,33 +18769,61 @@ wsrep_abort_transaction(
DBUG_ENTER("wsrep_abort_transaction");
ut_ad(bf_thd);
ut_ad(victim_thd);
+ trx_t* victim_trx= thd_to_trx(victim_thd);
+ trx_t* bf_trx= thd_to_trx(bf_thd);
- trx_t* victim_trx = thd_to_trx(victim_thd);
-
- WSREP_DEBUG("abort transaction: BF: %s victim: %s victim conf: %s",
- wsrep_thd_query(bf_thd),
- wsrep_thd_query(victim_thd),
- wsrep_thd_transaction_state_str(victim_thd));
+ /* Here we should hold THD::LOCK_thd_data to protect
+ victim from concurrent usage and THD::LOCK_thd_kill
+ to protect from disconnect or delete. */
+ WSREP_DEBUG("wsrep_abort_transaction: BF:"
+ " thread %ld client_state %s client_mode %s"
+ " trx_state %s query %s trx " TRX_ID_FMT,
+ thd_get_thread_id(bf_thd),
+ wsrep_thd_client_state_str(bf_thd),
+ wsrep_thd_client_mode_str(bf_thd),
+ wsrep_thd_transaction_state_str(bf_thd),
+ wsrep_thd_query(bf_thd),
+ bf_trx ? bf_trx->id : 0);
+
+ WSREP_DEBUG("wsrep_abort_transaction: victim:"
+ " thread %ld query_state %s conflict_state %s"
+ " exec %s query %s trx " TRX_ID_FMT,
+ thd_get_thread_id(victim_thd),
+ wsrep_thd_client_state_str(victim_thd),
+ wsrep_thd_client_mode_str(victim_thd),
+ wsrep_thd_transaction_state_str(victim_thd),
+ wsrep_thd_query(victim_thd),
+ victim_trx ? victim_trx->id : 0);
if (victim_trx) {
+ WSREP_DEBUG("wsrep_abort_transaction: Victim thread %ld "
+ "transaction " TRX_ID_FMT " trx_state %d",
+ thd_get_thread_id(victim_thd),
+ victim_trx->id,
+ victim_trx->state);
+ /* This is necessary as correct mutexing order is
+ lock_sys -> trx -> THD::LOCK_thd_data and below
+ function assumes we have lock_sys and trx locked
+ and takes THD::LOCK_thd_data for THD state check. */
+ wsrep_thd_UNLOCK(victim_thd);
+ DEBUG_SYNC(bf_thd, "wsrep_abort_victim_unlocked");
+ DBUG_EXECUTE_IF("wsrep_abort_replicated_sleep",
+ WSREP_DEBUG("wsrep_abort_transaction: sleeping "
+ "for thread %ld ",
+ thd_get_thread_id(victim_thd));
+ my_sleep(100000););
lock_mutex_enter();
trx_mutex_enter(victim_trx);
wsrep_innobase_kill_one_trx(bf_thd, victim_trx, signal);
trx_mutex_exit(victim_trx);
lock_mutex_exit();
-
wsrep_srv_conc_cancel_wait(victim_trx);
+ wsrep_thd_LOCK(victim_thd);
DBUG_VOID_RETURN;
} else {
- DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
- {
- const char act[]=
- "now "
- "SIGNAL sync.before_wsrep_thd_abort_reached "
- "WAIT_FOR signal.before_wsrep_thd_abort";
- DBUG_ASSERT(!debug_sync_set_action(bf_thd,
- STRING_WITH_LEN(act)));
- };);
+ WSREP_DEBUG("wsrep_abort_transaction: Victim thread %ld "
+ "no transaction",
+ thd_get_thread_id(victim_thd));
wsrep_thd_bf_abort(bf_thd, victim_thd, signal);
}
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index 108f6925ef7..482df3b1dd7 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -230,12 +230,10 @@ innobase_casedn_str(
char* a); /*!< in/out: string to put in lower case */
#ifdef WITH_WSREP
-UNIV_INTERN
void
-wsrep_innobase_kill_one_trx(
- THD* bf_thd,
- trx_t *victim_trx,
- bool signal);
+wsrep_innobase_kill_one_trx(MYSQL_THD const thd_ptr,
+ trx_t *victim_trx,
+ my_bool signal);
ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
unsigned char* str, unsigned int str_length,
diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc
index 59405e4c1ad..ae909619bf9 100644
--- a/storage/innobase/lock/lock0wait.cc
+++ b/storage/innobase/lock/lock0wait.cc
@@ -186,11 +186,7 @@ as a side effect trigger lock monitor
@param[in] trx transaction owning the lock
@param[in] locked true if trx and lock_sys.mutex is ownd
@return false for regular lock timeout */
-static
-bool
-wsrep_is_BF_lock_timeout(
- const trx_t* trx,
- bool locked = true)
+static bool wsrep_is_BF_lock_timeout(const trx_t* trx)
{
bool long_wait= (trx->error_state != DB_DEADLOCK &&
trx->is_wsrep() &&
@@ -203,19 +199,6 @@ wsrep_is_BF_lock_timeout(
if (long_wait) {
ib::info() << "WSREP: BF lock wait long for trx:" << trx->id
<< " query: " << wsrep_thd_query(trx->mysql_thd);
-
- if (!locked)
- lock_mutex_enter();
-
- ut_ad(lock_mutex_own());
-
- trx_print_latched(stderr, trx, 3000);
- /* Note this will release lock_sys mutex */
- lock_print_info_all_transactions(stderr);
-
- if (locked)
- lock_mutex_enter();
-
return was_wait;
} else
return false;
@@ -404,7 +387,7 @@ lock_wait_suspend_thread(
&& wait_time > (double) lock_wait_timeout
#ifdef WITH_WSREP
&& (!trx->is_wsrep()
- || (!wsrep_is_BF_lock_timeout(trx, false)
+ || (!wsrep_is_BF_lock_timeout(trx)
&& trx->error_state != DB_DEADLOCK))
#endif /* WITH_WSREP */
) {
diff --git a/storage/innobase/srv/srv0conc.cc b/storage/innobase/srv/srv0conc.cc
index 6167c8daeba..b8a2b4e788a 100644
--- a/storage/innobase/srv/srv0conc.cc
+++ b/storage/innobase/srv/srv0conc.cc
@@ -118,11 +118,15 @@ srv_conc_enter_innodb_with_atomics(
for (;;) {
ulint sleep_in_us;
#ifdef WITH_WSREP
- if (trx->is_wsrep() && wsrep_thd_is_aborting(trx->mysql_thd)) {
- if (UNIV_UNLIKELY(wsrep_debug)) {
- ib::info() <<
- "srv_conc_enter due to MUST_ABORT";
+ if (trx->is_wsrep()) {
+ wsrep_thd_LOCK(trx->mysql_thd);
+ if (wsrep_thd_is_aborting(trx->mysql_thd)) {
+ if (UNIV_UNLIKELY(wsrep_debug)) {
+ ib::info() <<
+ "srv_conc_enter due to MUST_ABORT";
+ }
}
+ wsrep_thd_UNLOCK(trx->mysql_thd);
srv_conc_force_enter_innodb(trx);
return;
}