diff options
author | Jan Lindström <jan.lindstrom@mariadb.com> | 2020-04-29 14:46:52 +0300 |
---|---|---|
committer | Jan Lindström <jan.lindstrom@mariadb.com> | 2020-05-06 09:27:12 +0300 |
commit | 21fa04ecf92570a5cbcd9d2cddda1e1ad790172c (patch) | |
tree | bc3dab94a91409666c8198d4f09c975db19ebb85 | |
parent | 2c3c851d2cba73825f81cd06220138b15c17ae4d (diff) | |
download | mariadb-git-bb-10.4-MDEV-21910-v2.tar.gz |
Analysis from 10.4bb-10.4-MDEV-21910-v2
BF abort:
wsrep_kill_victim (lock_sys->mutex, trx->mutex) => wsrep_kill_one_trx (thd->LOCK_thd_data)
KILL QUERY:
find_thread_by_id(thd->LOCK_thd_data , thd->LOCK_thd_kill) => thd->awake_no_mutex() => innobase_kill_query() => lock_trx_handle_wait( lock_sys->mutex, trx->mutex if not wsrep victim)
From these only lock_sys->mutex is global mutex. All others are either thread internal (thd) or transaction internal (trx). Therefore, mutex deadlock is possible if and only if thread we have selected as victim for BF abort and thread user is trying to kill are exactly the same one. While BF abort also will call thd->awake() it will have trx->lock.was_chosen_as_wsrep_victim=true and then we do not take lock_sys->mutex or trx->mutex at lock_trx_handle_wait(). Second problem is possible when in wsrep_kill_one_trx() we set trx->lock.was_chosen_as_wsrep_victim=true and release thd->LOCK_thd_data mutex. Now if we have a schedule where KILL is executed there is possibility that we either try to take lock_sys->mutex and have to wait or if this is same thread as victim no InnoDB mutexes are taken. Furthermore, in bf_abort() we might take thd->LOCK_thd_data again in wsrep-lib.
In this fix candidate we release lock_sys->mutex and trx->mutex before
we call wsrep_innobase_kill_one_trx(). Thus, it is safe to take
thd->LOCK_thd_data similarly as KILL does. We can remove
was_chosen_as_wsrep_victim variable from trx as it is not anymore
needed.
-rw-r--r-- | mysql-test/suite/galera/r/galera_bf_kill.result | 62 | ||||
-rw-r--r-- | mysql-test/suite/galera/t/MW-286.cnf | 10 | ||||
-rw-r--r-- | mysql-test/suite/galera/t/galera_bf_kill.cnf | 10 | ||||
-rw-r--r-- | mysql-test/suite/galera/t/galera_bf_kill.test | 117 | ||||
-rw-r--r-- | mysql-test/suite/galera/t/galera_drop_database.cnf | 10 | ||||
-rw-r--r-- | mysql-test/suite/galera/t/galera_set_position_after_dummy_writeset.test | 1 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 163 | ||||
-rw-r--r-- | storage/innobase/include/ha_prototypes.h | 5 | ||||
-rw-r--r-- | storage/innobase/include/trx0trx.h | 5 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 28 | ||||
-rw-r--r-- | storage/innobase/trx/trx0roll.cc | 3 |
11 files changed, 318 insertions, 96 deletions
diff --git a/mysql-test/suite/galera/r/galera_bf_kill.result b/mysql-test/suite/galera/r/galera_bf_kill.result new file mode 100644 index 00000000000..4277a153d1d --- /dev/null +++ b/mysql-test/suite/galera/r/galera_bf_kill.result @@ -0,0 +1,62 @@ +connection node_2; +connection node_1; +connection node_2; +CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB; +insert into t1 values (NULL,1); +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +begin; +update t1 set a = 5; +connection node_2; +select * from t1; +a b +2 1 +disconnect node_2a; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +begin; +update t1 set a =5; +connection node_2; +select * from t1; +a b +2 1 +disconnect node_2a; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +begin; +update t1 set a =5, b=2; +connection node_2; +ALTER TABLE t1 ADD UNIQUE KEY b1(b); +ALTER TABLE t1 DROP KEY b1; +select * from t1; +a b +2 1 +disconnect node_2a; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +begin; +update t1 set a =5, b=2; +connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2b; +begin; +update t1 set a =6, b=7; +connection node_2; +ALTER TABLE t1 ADD UNIQUE KEY b2(b); +ALTER TABLE t1 DROP KEY b2; +select * from t1; +a b +2 1 +disconnect node_2a; +disconnect node_2b; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +connection node_2a; +SET SESSION wsrep_on=OFF; +begin; +update t1 set a =5, b=2; +connection node_2; +ALTER TABLE t1 ADD UNIQUE KEY b3(b); +select * from t1; +a b +2 1 +disconnect node_2a; +drop table t1; diff --git a/mysql-test/suite/galera/t/MW-286.cnf b/mysql-test/suite/galera/t/MW-286.cnf new file mode 100644 index 00000000000..94fe1a3b54a --- /dev/null +++ b/mysql-test/suite/galera/t/MW-286.cnf @@ -0,0 +1,10 @@ +!include ../galera_2nodes.cnf + +[mysqld.1] +wsrep-debug=SERVER +wsrep-log-conflicts=ON + +[mysqld.2] +wsrep-debug=SERVER +wsrep-log-conflicts=ON + diff --git a/mysql-test/suite/galera/t/galera_bf_kill.cnf b/mysql-test/suite/galera/t/galera_bf_kill.cnf new file mode 100644 index 00000000000..94fe1a3b54a --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_kill.cnf @@ -0,0 +1,10 @@ +!include ../galera_2nodes.cnf + +[mysqld.1] +wsrep-debug=SERVER +wsrep-log-conflicts=ON + +[mysqld.2] +wsrep-debug=SERVER +wsrep-log-conflicts=ON + diff --git a/mysql-test/suite/galera/t/galera_bf_kill.test b/mysql-test/suite/galera/t/galera_bf_kill.test new file mode 100644 index 00000000000..f23c2d1172a --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_kill.test @@ -0,0 +1,117 @@ +--source include/galera_cluster.inc + +# +# Test case 1: Start a transaction on node_2a and kill it +# from other connection on same node +# + +--connection node_2 +CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB; +insert into t1 values (NULL,1); + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +begin; +update t1 set a = 5; + +--connection node_2 + +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1 +--source include/wait_condition.inc + +--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1` + +--disable_query_log +--eval KILL $k_thread +--enable_query_log + +select * from t1; +--disconnect node_2a + +# +# Test case 2: Start a transaction on node_2a and use +# kill query from other connection on same node +# + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +begin; +update t1 set a =5; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1 +--source include/wait_condition.inc + +--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1` + +--disable_query_log +--eval KILL QUERY $k_thread +--enable_query_log + +select * from t1; +--disconnect node_2a +# +# Test case 3: Start a transaction on node_2a and start a DDL on other transaction +# that will then abort node_2a transaction +# +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +begin; +update t1 set a =5, b=2; + +--connection node_2 +ALTER TABLE t1 ADD UNIQUE KEY b1(b); +ALTER TABLE t1 DROP KEY b1; + +select * from t1; + +--disconnect node_2a + +# +# Test case 4: Start a transaction on node_2a and conflicting transaction on node_2b +# and start a DDL on other transaction that will then abort node_2a and node_2b +# transactions +# + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +begin; +update t1 set a =5, b=2; + +--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2b +begin; +send update t1 set a =6, b=7; + +--connection node_2 +ALTER TABLE t1 ADD UNIQUE KEY b2(b); +ALTER TABLE t1 DROP KEY b2; + +select * from t1; + +--disconnect node_2a +--disconnect node_2b + +# +# Test case 5: Start a transaction on node_2a with wsrep disabled +# and start a DDL on other transaction that will then abort node_2a +# transactions +# + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +--connection node_2a +SET SESSION wsrep_on=OFF; +begin; +update t1 set a =5, b=2; + +--connection node_2 +ALTER TABLE t1 ADD UNIQUE KEY b3(b); + +select * from t1; + +--disconnect node_2a + +drop table t1; + + + diff --git a/mysql-test/suite/galera/t/galera_drop_database.cnf b/mysql-test/suite/galera/t/galera_drop_database.cnf new file mode 100644 index 00000000000..94fe1a3b54a --- /dev/null +++ b/mysql-test/suite/galera/t/galera_drop_database.cnf @@ -0,0 +1,10 @@ +!include ../galera_2nodes.cnf + +[mysqld.1] +wsrep-debug=SERVER +wsrep-log-conflicts=ON + +[mysqld.2] +wsrep-debug=SERVER +wsrep-log-conflicts=ON + diff --git a/mysql-test/suite/galera/t/galera_set_position_after_dummy_writeset.test b/mysql-test/suite/galera/t/galera_set_position_after_dummy_writeset.test index f528b1435bb..47513f57b5c 100644 --- a/mysql-test/suite/galera/t/galera_set_position_after_dummy_writeset.test +++ b/mysql-test/suite/galera/t/galera_set_position_after_dummy_writeset.test @@ -5,6 +5,7 @@ --source include/galera_cluster.inc --source include/have_debug_sync.inc +--source include/ galera_have_debug_sync.inc --let $node_1=node_1 --let $node_2=node_2 diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4f11c50da9c..1e26c87ae12 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -18663,98 +18663,120 @@ static struct st_mysql_storage_engine innobase_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; #ifdef WITH_WSREP -void -wsrep_abort_slave_trx( -/*==================*/ - wsrep_seqno_t bf_seqno, - wsrep_seqno_t victim_seqno) -{ - WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be " - "caused by:\n\t" - "1) unsupported configuration options combination, please check documentation.\n\t" - "2) a bug in the code.\n\t" - "3) a database corruption.\n Node consistency compromized, " - "need to abort. Restart the node to resync with cluster.", - (long long)bf_seqno, (long long)victim_seqno); - abort(); -} -/*******************************************************************//** -This function is used to kill one transaction in BF. */ + +/** This function is used to kill one transaction. + +This transaction was open on this node (not-yet-committed), and a +conflicting writeset from some other node that was being applied +caused a locking conflict. First committed (from other node) +wins, thus open transaction is rolled back. BF stands for +brute-force: any transaction can get aborted by galera any time +it is necessary. + +This conflict can happen only when the replicated writeset (from +other node) is being applied, not when it’s waiting in the queue. +If our local transaction reached its COMMIT and this conflicting +writeset was in the queue, then it should fail the local +certification test instead. + +A brute force abort is only triggered by a locking conflict +between a writeset being applied by an applier thread (slave thread) +and an open transaction on the node, not by a Galera writeset +comparison as in the local certification failure. + +@param[in] bf_thd Brute force (BF) thread +@param[in,out] victim_trx Vimtim trx to be killed +@param[in] signal Should victim be signaled */ UNIV_INTERN int wsrep_innobase_kill_one_trx( -/*========================*/ - void * const bf_thd_ptr, - const trx_t * const bf_trx, + THD* bf_thd, trx_t *victim_trx, - ibool signal) + my_bool signal) { - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(victim_trx)); - ut_ad(bf_thd_ptr); - ut_ad(victim_trx); + ut_ad(victim_trx); + ut_ad(!lock_mutex_own()); + ut_ad(!trx_mutex_own(victim_trx)); DBUG_ENTER("wsrep_innobase_kill_one_trx"); - THD *bf_thd = bf_thd_ptr ? (THD*) bf_thd_ptr : NULL; - THD *thd = (THD *) victim_trx->mysql_thd; - int64_t bf_seqno = (bf_thd) ? wsrep_thd_trx_seqno(bf_thd) : 0; - if (!thd) { - DBUG_PRINT("wsrep", ("no thd for conflicting lock")); - WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id); - DBUG_RETURN(1); - } + THD *thd= (THD *) victim_trx->mysql_thd; + /* Note that bf_trx might not exists here e.g. on MDL conflict + case. See galera_concurrent_ctas test case */ + trx_t* bf_trx= thd_to_trx(bf_thd); - if (!bf_thd) { - DBUG_PRINT("wsrep", ("no BF thd for conflicting lock")); - WSREP_WARN("no BF THD for trx: " TRX_ID_FMT, - bf_trx ? bf_trx->id : 0); - DBUG_RETURN(1); - } - WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); + ut_ad(bf_thd); + ut_ad(thd); + + /* Note that now that we do not hold lock_sys->mutex or + trx->mutex. this is safe */ wsrep_thd_LOCK(thd); - WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF - "), victim: (%lu) trx: " TRX_ID_FMT, - signal, bf_seqno, - thd_get_thread_id(thd), - victim_trx->id); - - WSREP_DEBUG("Aborting query: %s conf %s trx: %lld", - (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void", - wsrep_thd_transaction_state_str(thd), - wsrep_thd_transaction_id(thd)); - - /* - * we mark with was_chosen_as_deadlock_victim transaction, - * which is already marked as BF victim - * lock_sys is held until this vicitm has aborted - */ - victim_trx->lock.was_chosen_as_wsrep_victim = TRUE; + WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); + + WSREP_DEBUG("Aborter %s trx_id: " TRX_ID_FMT " thread: %ld " + "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s " + "query: %s", + wsrep_thd_is_BF(bf_thd, false) ? "BF" : "normal", + bf_trx ? bf_trx->id : TRX_ID_MAX, + thd_get_thread_id(bf_thd), + wsrep_thd_trx_seqno(bf_thd), + wsrep_thd_client_state_str(bf_thd), + wsrep_thd_client_mode_str(bf_thd), + wsrep_thd_transaction_state_str(bf_thd), + wsrep_thd_query(bf_thd)); + + WSREP_DEBUG("Victim %s trx_id: " TRX_ID_FMT " thread: %ld " + "seqno: %lld client_state: %s client_mode: %s transaction_mode: %s " + "query: %s", + wsrep_thd_is_BF(thd, false) ? "BF" : "normal", + victim_trx->id, + thd_get_thread_id(thd), + wsrep_thd_trx_seqno(thd), + wsrep_thd_client_state_str(thd), + wsrep_thd_client_mode_str(thd), + wsrep_thd_transaction_state_str(thd), + wsrep_thd_query(thd)); + + /* Note that we need to release this as it will be acquired + below in wsrep-lib */ wsrep_thd_UNLOCK(thd); + if (wsrep_thd_bf_abort(bf_thd, thd, signal)) { + /* Note that victim might not wait any locks but it + could hold them. Victim can be also selected + to be aborted in case of MDL conflict. */ if (victim_trx->lock.wait_lock) { WSREP_DEBUG("victim has wait flag: %lu", thd_get_thread_id(thd)); - lock_t* wait_lock = victim_trx->lock.wait_lock; - if (wait_lock) { - WSREP_DEBUG("canceling wait lock"); - victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; - lock_cancel_waiting_and_release(wait_lock); - } + WSREP_DEBUG("canceling wait lock"); + lock_mutex_enter(); + trx_mutex_enter(victim_trx); + victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; + lock_cancel_waiting_and_release(victim_trx->lock.wait_lock); + trx_mutex_exit(victim_trx); + lock_mutex_exit(); } } DBUG_RETURN(0); } +/** + This function forces the victim transaction to abort. Aborting the + transaction does NOT end it, it still has to be rolled back. + + @param bf_thd brute force THD asking for the abort + @param victim_thd victim THD to be aborted + + @return 0 victim was aborted + @return -1 victim thread was aborted (no transaction) +*/ static int -wsrep_abort_transaction( -/*====================*/ - handlerton*, +wsrep_abort_transaction(handlerton*, THD *bf_thd, THD *victim_thd, my_bool signal) @@ -18762,7 +18784,8 @@ wsrep_abort_transaction( DBUG_ENTER("wsrep_innobase_abort_thd"); trx_t* victim_trx = thd_to_trx(victim_thd); - trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL; + + ut_ad(bf_thd); WSREP_DEBUG("abort transaction: BF: %s victim: %s victim conf: %s", wsrep_thd_query(bf_thd), @@ -18770,12 +18793,8 @@ wsrep_abort_transaction( wsrep_thd_transaction_state_str(victim_thd)); if (victim_trx) { - lock_mutex_enter(); - trx_mutex_enter(victim_trx); - int rcode= wsrep_innobase_kill_one_trx(bf_thd, bf_trx, + int rcode= wsrep_innobase_kill_one_trx(bf_thd, victim_trx, signal); - trx_mutex_exit(victim_trx); - lock_mutex_exit(); wsrep_srv_conc_cancel_wait(victim_trx); DBUG_RETURN(rcode); } else { diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index cd7f5355818..38751138b4f 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -232,10 +232,9 @@ innobase_casedn_str( #ifdef WITH_WSREP UNIV_INTERN int -wsrep_innobase_kill_one_trx(void * const thd_ptr, - const trx_t * const bf_trx, +wsrep_innobase_kill_one_trx(THD* bf_thd, trx_t *victim_trx, - ibool signal); + my_bool signal); ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, unsigned char* str, unsigned int str_length, unsigned int buf_length); diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 87bf97f00cf..2bfd7aa94a7 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -549,11 +549,6 @@ struct trx_lock_t { lock_sys.mutex. Otherwise, this may only be modified by the thread that is serving the running transaction. */ -#ifdef WITH_WSREP - bool was_chosen_as_wsrep_victim; - /*!< high priority wsrep thread has - marked this trx to abort */ -#endif /* WITH_WSREP */ /** Pre-allocated record locks */ struct { diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 349dd01f904..f0e509d0c09 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1085,11 +1085,10 @@ wsrep_kill_victim( { ut_ad(lock_mutex_own()); ut_ad(trx_mutex_own(lock->trx)); - - /* quit for native mysql */ - if (!trx->is_wsrep()) return; + ut_ad(trx->is_wsrep()); if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { + trx_mutex_exit(lock->trx); return; } @@ -1106,6 +1105,7 @@ wsrep_kill_victim( } /* cannot release lock, until our lock is in the queue*/ + trx_mutex_exit(lock->trx); } else if (lock->trx != trx) { if (wsrep_log_conflicts) { ib::info() << "*** Priority TRANSACTION:"; @@ -1133,9 +1133,15 @@ wsrep_kill_victim( << wsrep_thd_query(lock->trx->mysql_thd); } - wsrep_innobase_kill_one_trx(trx->mysql_thd, - trx, lock->trx, TRUE); + lock->trx->reference(); + trx_mutex_exit(lock->trx); + lock_mutex_exit(); + wsrep_innobase_kill_one_trx(trx->mysql_thd, lock->trx, true); + lock_mutex_enter(); + lock->trx->release_reference(); } + } else { + trx_mutex_exit(lock->trx); } } #endif /* WITH_WSREP */ @@ -1175,7 +1181,8 @@ lock_rec_other_has_conflicting( or lock->trx depending on priority of the transaction. */ wsrep_kill_victim(const_cast<trx_t*>(trx), lock); - trx_mutex_exit(lock->trx); + ut_ad(lock_mutex_own()); + ut_ad(!trx_mutex_own(lock->trx)); } #endif /* WITH_WSREP */ return(lock); @@ -3827,7 +3834,8 @@ lock_table_other_has_incompatible( } trx_mutex_enter(lock->trx); wsrep_kill_victim((trx_t *)trx, (lock_t *)lock); - trx_mutex_exit(lock->trx); + ut_ad(lock_mutex_own()); + ut_ad(!trx_mutex_own(lock->trx)); } #endif /* WITH_WSREP */ @@ -6255,12 +6263,6 @@ lock_trx_handle_wait( /*=================*/ trx_t* trx) /*!< in/out: trx lock state */ { -#ifdef WITH_WSREP - /* We already own mutexes */ - if (trx->lock.was_chosen_as_wsrep_victim) { - return lock_trx_handle_wait_low(trx); - } -#endif /* WITH_WSREP */ lock_mutex_enter(); trx_mutex_enter(trx); dberr_t err = lock_trx_handle_wait_low(trx); diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index 637f8b709f5..9c807c76baa 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -452,9 +452,6 @@ trx_rollback_to_savepoint_for_mysql_low( trx_mark_sql_stat_end(trx); trx->op_info = ""; -#ifdef WITH_WSREP - trx->lock.was_chosen_as_wsrep_victim = false; -#endif return(err); } |