diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2020-12-15 17:45:19 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2020-12-15 17:56:18 +0200 |
commit | cf2480dd77a45cf56389015a1eabf567846c0658 (patch) | |
tree | 728e51ba05a7c6fbb6ca40a2b89252783a93a531 | |
parent | ff5d306e296350e7489dd3decb01bad18d135411 (diff) | |
download | mariadb-git-cf2480dd77a45cf56389015a1eabf567846c0658.tar.gz |
MDEV-21452: Retain the watchdog only on dict_sys.mutex, for performancebb-10.6-MDEV-21452-2
Most hangs seem to involve dict_sys.mutex. While holding lock_sys.mutex
we rarely acquire any buffer pool page latches, which are a frequent
source of potential hangs.
-rw-r--r-- | mysql-test/suite/sys_vars/r/innodb_fatal_semaphore_wait_threshold.result | 33 | ||||
-rw-r--r-- | mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test | 77 | ||||
-rw-r--r-- | storage/innobase/include/lock0lock.h | 17 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 35 | ||||
-rw-r--r-- | storage/innobase/row/row0mysql.cc | 2 | ||||
-rw-r--r-- | storage/innobase/srv/srv0srv.cc | 14 |
6 files changed, 30 insertions, 148 deletions
diff --git a/mysql-test/suite/sys_vars/r/innodb_fatal_semaphore_wait_threshold.result b/mysql-test/suite/sys_vars/r/innodb_fatal_semaphore_wait_threshold.result index b3ab6fbc1da..50fbc1bd83b 100644 --- a/mysql-test/suite/sys_vars/r/innodb_fatal_semaphore_wait_threshold.result +++ b/mysql-test/suite/sys_vars/r/innodb_fatal_semaphore_wait_threshold.result @@ -1,25 +1,14 @@ connect con1,localhost,root,,; +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; +SET DEBUG_SYNC='create_table SIGNAL stuck WAIT_FOR ever'; +CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB; connect con2,localhost,root,,; -drop table if exists t1; -connection con1; -create table t1 (id integer, x integer) engine = InnoDB; -insert into t1 values(0, 0); -set DEBUG_DBUG='+d,fatal-semaphore-timeout'; -set autocommit=0; -# Sending query on con1, -# the session will hold lock table mutex and sleep -SELECT * from t1 where id = 0 FOR UPDATE; -connection con2; -set autocommit=0; -# Sending query on con2, -# the session will be blocked on the lock table mutex and -# thus be put into sync arry -SELECT * from t1 where id = 0 FOR UPDATE; +SET DEBUG_SYNC='now WAIT_FOR stuck'; +FLUSH TABLES; +SELECT * FROM t1; connection default; -# Waitting for mysqld to crash -# Mysqld crash was detected -# Waitting for reconnect after mysqld restarts -# Reconnected after mysqld was successfully restarted -# Cleaning up before exit -drop table if exists t1; -# Clean exit +# Waiting for mariadbd to crash +# Crash was detected +DROP TABLE t1, t2; +Warnings: +Warning 1932 Table 'test.t2' doesn't exist in engine diff --git a/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test b/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test index e9dd0c71936..dc28498675d 100644 --- a/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test +++ b/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test @@ -1,5 +1,5 @@ -# Only test in debug mode since DBUG_EXECUTE_IF is used --source include/have_debug.inc +--source include/have_debug_sync.inc # Can't test this with embedded server --source include/not_embedded.inc @@ -11,56 +11,17 @@ --source include/have_innodb.inc connect (con1,localhost,root,,); +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; + +SET DEBUG_SYNC='create_table SIGNAL stuck WAIT_FOR ever'; +send CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB; + connect (con2,localhost,root,,); +SET DEBUG_SYNC='now WAIT_FOR stuck'; +FLUSH TABLES; ---disable_warnings -drop table if exists t1; ---enable_warnings - -connection con1; -eval create table t1 (id integer, x integer) engine = InnoDB; -insert into t1 values(0, 0); - -# Enable the debug injection. -set DEBUG_DBUG='+d,fatal-semaphore-timeout'; -set autocommit=0; - -# The following query will hang for an hour since the debug injection -# code will sleep an hour after holding the lock table mutex ---echo # Sending query on con1, ---echo # the session will hold lock table mutex and sleep ---send -SELECT * from t1 where id = 0 FOR UPDATE; - -# To make sure con1 holding the lock table mutex and sleeping ---sleep 2 - -connection con2; -set autocommit=0; - -# The following query will be blocked on the lock table mutex held by -# con1 so it will be put into sync array. ---echo # Sending query on con2, ---echo # the session will be blocked on the lock table mutex and ---echo # thus be put into sync arry ---send -SELECT * from t1 where id = 0 FOR UPDATE; - -# Waitting for mysqld to abort due to fatal semaphore timeout. -# Please note that, in the master.opt file, the fatal timeout -# was set to 1 second, but in mysqld debug mode, this timeout -# value will be timed 10 because UNIV_DEBUG_VALGRIND is set -# (see sync_array_print_long_waits_low() in storage/innobase/sync/sync0arr.cc) -# so the actual timeout will be 1 * 10 = 10 seconds. Besides, -# mysqld will abort after detecting this fatal timeout 10 times in -# a loop with interval of 1 second (see srv_error_monitor_thread -# thread in torage/innobase/srv/srv0srv.cc), so mysqld will abort -# in 1 * 10 + 1 * 10 = 20 seconds after con2 being blocked on -# the lock table mutex. -# -# P.S. the default fatal sempahore timeout is 600 seconds, -# so mysqld will abort after 600 * 10 + 1 * 10 = 6010 seconds -# in debug mode and 600 + 1 * 10 = 610 seconds in release mode. +# The following query will be blocked on the dict_sys.mutex held by con1 +send SELECT * FROM t1; connection default; @@ -73,10 +34,10 @@ call mtr.add_suppression(".*"); # The crash is expected exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; ---echo # Waitting for mysqld to crash +--echo # Waiting for mariadbd to crash -# It will take 20 seconds to detect the long semaphore and mysqld to abort. -# This test will be treated as pass as long as mysqld crash/restart is dectected +# It may take 20 seconds to detect the long semaphore and mysqld to abort. +# This test will be treated as pass as long as mysqld crash/restart is detected # in 80 seconds. let $counter= 80; let $mysql_errno= 0; @@ -94,8 +55,7 @@ while (!$mysql_errno) --sleep 1 } ---echo # Mysqld crash was detected ---echo # Waitting for reconnect after mysqld restarts +--echo # Crash was detected enable_reconnect; connection default; @@ -105,11 +65,4 @@ connection default; # Call script that will poll the server waiting for it to be back online again source include/wait_until_connected_again.inc; ---echo # Reconnected after mysqld was successfully restarted - ---echo # Cleaning up before exit ---disable_warnings -drop table if exists t1; ---enable_warnings - ---echo # Clean exit +DROP TABLE t1, t2; diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 910db0ab8ec..e197f383201 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -706,15 +706,9 @@ class lock_sys_t { bool m_initialised; - /** The my_hrtime_coarse().val of the oldest mutex_lock_wait() start, or 0 */ - std::atomic<ulonglong> mutex_wait_start; - /** mutex proteting the locks */ MY_ALIGNED(CACHE_LINE_SIZE) mysql_mutex_t mutex; public: - /** Diagnostic message for exceeding the mutex_lock_wait() timeout */ - static const char fatal_msg[]; - /** record locks */ hash_table_t rec_hash; /** predicate locks for SPATIAL INDEX */ @@ -747,10 +741,6 @@ public: bool is_initialised() { return m_initialised; } -private: - /** Acquire lock_sys.mutex */ - ATTRIBUTE_NOINLINE void mutex_lock_wait(); -public: #ifdef HAVE_PSI_MUTEX_INTERFACE /** Try to acquire lock_sys.mutex */ ATTRIBUTE_NOINLINE int mutex_trylock(); @@ -762,7 +752,7 @@ public: /** Try to acquire lock_sys.mutex */ int mutex_trylock() { return mysql_mutex_trylock(&mutex); } /** Aqcuire lock_sys.mutex */ - void mutex_lock() { if (mutex_trylock()) mutex_lock_wait(); } + void mutex_lock() { mysql_mutex_lock(&mutex); } /** Release lock_sys.mutex */ void mutex_unlock() { mysql_mutex_unlock(&mutex); } #endif @@ -771,11 +761,6 @@ public: /** Assert that mutex_lock() has not been invoked */ void mutex_assert_unlocked() const { mysql_mutex_assert_not_owner(&mutex); } - /** @return the my_hrtime_coarse().val of the oldest mutex_lock_wait() start, - assuming that requests are served on a FIFO basis */ - ulonglong oldest_wait() const - { return mutex_wait_start.load(std::memory_order_relaxed); } - /** Wait for a lock to be granted */ void wait_lock(lock_t **lock, mysql_cond_t *cond) { while (*lock) mysql_cond_wait(cond, &mutex); } diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 1c78f963e02..66644865309 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -304,12 +304,6 @@ static bool lock_rec_validate_page(const buf_block_t *block, bool latched) /* The lock system */ lock_sys_t lock_sys; -/** Diagnostic message for exceeding the mutex_lock_wait() timeout */ -const char lock_sys_t::fatal_msg[]= - "innodb_fatal_semaphore_wait_threshold was exceeded for lock_sys.mutex. " - "Please refer to " - "https://mariadb.com/kb/en/how-to-produce-a-full-stack-trace-for-mysqld/"; - /** We store info on the latest deadlock error to this buffer. InnoDB Monitor will then fetch it and print */ static bool lock_deadlock_found = false; @@ -476,36 +470,12 @@ void lock_sys_t::create(ulint n_cells) timeout_timer_active = false; } -void lock_sys_t::mutex_lock_wait() -{ - ulonglong now= my_hrtime_coarse().val, old= 0; - if (mutex_wait_start.compare_exchange_strong - (old, now, std::memory_order_relaxed, std::memory_order_relaxed)) - { - mysql_mutex_lock(&mutex); - mutex_wait_start.store(0, std::memory_order_relaxed); - return; - } - - ut_ad(old); - /* We could have old > now due to our use of my_hrtime_coarse(). */ - ulong waited= old <= now ? static_cast<ulong>((now - old) / 1000000) : 0; - const ulong threshold= srv_fatal_semaphore_wait_threshold; - - if (waited >= threshold) - ib::fatal() << fatal_msg; - - if (waited > threshold / 4) - ib::warn() << "A long wait (" << waited - << " seconds) was observed for lock_sys.mutex"; - mysql_mutex_lock(&mutex); -} #ifdef HAVE_PSI_MUTEX_INTERFACE /** Try to acquire lock_sys.mutex */ int lock_sys_t::mutex_trylock() { return mysql_mutex_trylock(&mutex); } /** Acquire lock_sys.mutex */ -void lock_sys_t::mutex_lock() { if (mutex_trylock()) mutex_lock_wait(); } +void lock_sys_t::mutex_lock() { mysql_mutex_lock(&mutex); } /** Release lock_sys.mutex */ void lock_sys_t::mutex_unlock() { mysql_mutex_unlock(&mutex); } #endif @@ -3571,9 +3541,6 @@ lock_table( lock_sys.mutex_lock(); - DBUG_EXECUTE_IF("fatal-semaphore-timeout", - { os_thread_sleep(3600000000LL); }); - /* We have to check if the new lock is compatible with any locks other transactions have in the table lock queue. */ diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 5de60e2c862..d08d5662a69 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -2304,6 +2304,8 @@ row_create_table_for_mysql( ut_d(dict_sys.assert_locked()); ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); + DEBUG_SYNC_C("create_table"); + DBUG_EXECUTE_IF( "ib_create_table_fail_at_start_of_row_create_table_for_mysql", goto err_exit; diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 6a5e400015f..4fb2ffcc7bf 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1349,20 +1349,6 @@ void srv_monitor_task(void*) const ulonglong now = my_hrtime_coarse().val; const ulong threshold = srv_fatal_semaphore_wait_threshold; - if (ulonglong start = lock_sys.oldest_wait()) { - ulong waited = static_cast<ulong>((now - start) / 1000000); - if (waited >= threshold) { - ib::fatal() << lock_sys.fatal_msg; - } - - if (waited == threshold / 4 - || waited == threshold / 2 - || waited == threshold / 4 * 3) { - ib::warn() << "Long wait (" << waited - << " seconds) for lock_sys.mutex"; - } - } - if (ulonglong start = dict_sys.oldest_wait()) { ulong waited = static_cast<ulong>((now - start) / 1000000); if (waited >= threshold) { |