summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-12-15 17:45:19 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2020-12-15 17:56:18 +0200
commitcf2480dd77a45cf56389015a1eabf567846c0658 (patch)
tree728e51ba05a7c6fbb6ca40a2b89252783a93a531
parentff5d306e296350e7489dd3decb01bad18d135411 (diff)
downloadmariadb-git-cf2480dd77a45cf56389015a1eabf567846c0658.tar.gz
MDEV-21452: Retain the watchdog only on dict_sys.mutex, for performancebb-10.6-MDEV-21452-2
Most hangs seem to involve dict_sys.mutex. While holding lock_sys.mutex we rarely acquire any buffer pool page latches, which are a frequent source of potential hangs.
-rw-r--r--mysql-test/suite/sys_vars/r/innodb_fatal_semaphore_wait_threshold.result33
-rw-r--r--mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test77
-rw-r--r--storage/innobase/include/lock0lock.h17
-rw-r--r--storage/innobase/lock/lock0lock.cc35
-rw-r--r--storage/innobase/row/row0mysql.cc2
-rw-r--r--storage/innobase/srv/srv0srv.cc14
6 files changed, 30 insertions, 148 deletions
diff --git a/mysql-test/suite/sys_vars/r/innodb_fatal_semaphore_wait_threshold.result b/mysql-test/suite/sys_vars/r/innodb_fatal_semaphore_wait_threshold.result
index b3ab6fbc1da..50fbc1bd83b 100644
--- a/mysql-test/suite/sys_vars/r/innodb_fatal_semaphore_wait_threshold.result
+++ b/mysql-test/suite/sys_vars/r/innodb_fatal_semaphore_wait_threshold.result
@@ -1,25 +1,14 @@
connect con1,localhost,root,,;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
+SET DEBUG_SYNC='create_table SIGNAL stuck WAIT_FOR ever';
+CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
connect con2,localhost,root,,;
-drop table if exists t1;
-connection con1;
-create table t1 (id integer, x integer) engine = InnoDB;
-insert into t1 values(0, 0);
-set DEBUG_DBUG='+d,fatal-semaphore-timeout';
-set autocommit=0;
-# Sending query on con1,
-# the session will hold lock table mutex and sleep
-SELECT * from t1 where id = 0 FOR UPDATE;
-connection con2;
-set autocommit=0;
-# Sending query on con2,
-# the session will be blocked on the lock table mutex and
-# thus be put into sync arry
-SELECT * from t1 where id = 0 FOR UPDATE;
+SET DEBUG_SYNC='now WAIT_FOR stuck';
+FLUSH TABLES;
+SELECT * FROM t1;
connection default;
-# Waitting for mysqld to crash
-# Mysqld crash was detected
-# Waitting for reconnect after mysqld restarts
-# Reconnected after mysqld was successfully restarted
-# Cleaning up before exit
-drop table if exists t1;
-# Clean exit
+# Waiting for mariadbd to crash
+# Crash was detected
+DROP TABLE t1, t2;
+Warnings:
+Warning 1932 Table 'test.t2' doesn't exist in engine
diff --git a/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test b/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test
index e9dd0c71936..dc28498675d 100644
--- a/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test
+++ b/mysql-test/suite/sys_vars/t/innodb_fatal_semaphore_wait_threshold.test
@@ -1,5 +1,5 @@
-# Only test in debug mode since DBUG_EXECUTE_IF is used
--source include/have_debug.inc
+--source include/have_debug_sync.inc
# Can't test this with embedded server
--source include/not_embedded.inc
@@ -11,56 +11,17 @@
--source include/have_innodb.inc
connect (con1,localhost,root,,);
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
+
+SET DEBUG_SYNC='create_table SIGNAL stuck WAIT_FOR ever';
+send CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
+
connect (con2,localhost,root,,);
+SET DEBUG_SYNC='now WAIT_FOR stuck';
+FLUSH TABLES;
---disable_warnings
-drop table if exists t1;
---enable_warnings
-
-connection con1;
-eval create table t1 (id integer, x integer) engine = InnoDB;
-insert into t1 values(0, 0);
-
-# Enable the debug injection.
-set DEBUG_DBUG='+d,fatal-semaphore-timeout';
-set autocommit=0;
-
-# The following query will hang for an hour since the debug injection
-# code will sleep an hour after holding the lock table mutex
---echo # Sending query on con1,
---echo # the session will hold lock table mutex and sleep
---send
-SELECT * from t1 where id = 0 FOR UPDATE;
-
-# To make sure con1 holding the lock table mutex and sleeping
---sleep 2
-
-connection con2;
-set autocommit=0;
-
-# The following query will be blocked on the lock table mutex held by
-# con1 so it will be put into sync array.
---echo # Sending query on con2,
---echo # the session will be blocked on the lock table mutex and
---echo # thus be put into sync arry
---send
-SELECT * from t1 where id = 0 FOR UPDATE;
-
-# Waitting for mysqld to abort due to fatal semaphore timeout.
-# Please note that, in the master.opt file, the fatal timeout
-# was set to 1 second, but in mysqld debug mode, this timeout
-# value will be timed 10 because UNIV_DEBUG_VALGRIND is set
-# (see sync_array_print_long_waits_low() in storage/innobase/sync/sync0arr.cc)
-# so the actual timeout will be 1 * 10 = 10 seconds. Besides,
-# mysqld will abort after detecting this fatal timeout 10 times in
-# a loop with interval of 1 second (see srv_error_monitor_thread
-# thread in torage/innobase/srv/srv0srv.cc), so mysqld will abort
-# in 1 * 10 + 1 * 10 = 20 seconds after con2 being blocked on
-# the lock table mutex.
-#
-# P.S. the default fatal sempahore timeout is 600 seconds,
-# so mysqld will abort after 600 * 10 + 1 * 10 = 6010 seconds
-# in debug mode and 600 + 1 * 10 = 610 seconds in release mode.
+# The following query will be blocked on the dict_sys.mutex held by con1
+send SELECT * FROM t1;
connection default;
@@ -73,10 +34,10 @@ call mtr.add_suppression(".*");
# The crash is expected
exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
---echo # Waitting for mysqld to crash
+--echo # Waiting for mariadbd to crash
-# It will take 20 seconds to detect the long semaphore and mysqld to abort.
-# This test will be treated as pass as long as mysqld crash/restart is dectected
+# It may take 20 seconds to detect the long semaphore and mysqld to abort.
+# This test will be treated as pass as long as mysqld crash/restart is detected
# in 80 seconds.
let $counter= 80;
let $mysql_errno= 0;
@@ -94,8 +55,7 @@ while (!$mysql_errno)
--sleep 1
}
---echo # Mysqld crash was detected
---echo # Waitting for reconnect after mysqld restarts
+--echo # Crash was detected
enable_reconnect;
connection default;
@@ -105,11 +65,4 @@ connection default;
# Call script that will poll the server waiting for it to be back online again
source include/wait_until_connected_again.inc;
---echo # Reconnected after mysqld was successfully restarted
-
---echo # Cleaning up before exit
---disable_warnings
-drop table if exists t1;
---enable_warnings
-
---echo # Clean exit
+DROP TABLE t1, t2;
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 910db0ab8ec..e197f383201 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -706,15 +706,9 @@ class lock_sys_t
{
bool m_initialised;
- /** The my_hrtime_coarse().val of the oldest mutex_lock_wait() start, or 0 */
- std::atomic<ulonglong> mutex_wait_start;
-
/** mutex proteting the locks */
MY_ALIGNED(CACHE_LINE_SIZE) mysql_mutex_t mutex;
public:
- /** Diagnostic message for exceeding the mutex_lock_wait() timeout */
- static const char fatal_msg[];
-
/** record locks */
hash_table_t rec_hash;
/** predicate locks for SPATIAL INDEX */
@@ -747,10 +741,6 @@ public:
bool is_initialised() { return m_initialised; }
-private:
- /** Acquire lock_sys.mutex */
- ATTRIBUTE_NOINLINE void mutex_lock_wait();
-public:
#ifdef HAVE_PSI_MUTEX_INTERFACE
/** Try to acquire lock_sys.mutex */
ATTRIBUTE_NOINLINE int mutex_trylock();
@@ -762,7 +752,7 @@ public:
/** Try to acquire lock_sys.mutex */
int mutex_trylock() { return mysql_mutex_trylock(&mutex); }
/** Aqcuire lock_sys.mutex */
- void mutex_lock() { if (mutex_trylock()) mutex_lock_wait(); }
+ void mutex_lock() { mysql_mutex_lock(&mutex); }
/** Release lock_sys.mutex */
void mutex_unlock() { mysql_mutex_unlock(&mutex); }
#endif
@@ -771,11 +761,6 @@ public:
/** Assert that mutex_lock() has not been invoked */
void mutex_assert_unlocked() const { mysql_mutex_assert_not_owner(&mutex); }
- /** @return the my_hrtime_coarse().val of the oldest mutex_lock_wait() start,
- assuming that requests are served on a FIFO basis */
- ulonglong oldest_wait() const
- { return mutex_wait_start.load(std::memory_order_relaxed); }
-
/** Wait for a lock to be granted */
void wait_lock(lock_t **lock, mysql_cond_t *cond)
{ while (*lock) mysql_cond_wait(cond, &mutex); }
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 1c78f963e02..66644865309 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -304,12 +304,6 @@ static bool lock_rec_validate_page(const buf_block_t *block, bool latched)
/* The lock system */
lock_sys_t lock_sys;
-/** Diagnostic message for exceeding the mutex_lock_wait() timeout */
-const char lock_sys_t::fatal_msg[]=
- "innodb_fatal_semaphore_wait_threshold was exceeded for lock_sys.mutex. "
- "Please refer to "
- "https://mariadb.com/kb/en/how-to-produce-a-full-stack-trace-for-mysqld/";
-
/** We store info on the latest deadlock error to this buffer. InnoDB
Monitor will then fetch it and print */
static bool lock_deadlock_found = false;
@@ -476,36 +470,12 @@ void lock_sys_t::create(ulint n_cells)
timeout_timer_active = false;
}
-void lock_sys_t::mutex_lock_wait()
-{
- ulonglong now= my_hrtime_coarse().val, old= 0;
- if (mutex_wait_start.compare_exchange_strong
- (old, now, std::memory_order_relaxed, std::memory_order_relaxed))
- {
- mysql_mutex_lock(&mutex);
- mutex_wait_start.store(0, std::memory_order_relaxed);
- return;
- }
-
- ut_ad(old);
- /* We could have old > now due to our use of my_hrtime_coarse(). */
- ulong waited= old <= now ? static_cast<ulong>((now - old) / 1000000) : 0;
- const ulong threshold= srv_fatal_semaphore_wait_threshold;
-
- if (waited >= threshold)
- ib::fatal() << fatal_msg;
-
- if (waited > threshold / 4)
- ib::warn() << "A long wait (" << waited
- << " seconds) was observed for lock_sys.mutex";
- mysql_mutex_lock(&mutex);
-}
#ifdef HAVE_PSI_MUTEX_INTERFACE
/** Try to acquire lock_sys.mutex */
int lock_sys_t::mutex_trylock() { return mysql_mutex_trylock(&mutex); }
/** Acquire lock_sys.mutex */
-void lock_sys_t::mutex_lock() { if (mutex_trylock()) mutex_lock_wait(); }
+void lock_sys_t::mutex_lock() { mysql_mutex_lock(&mutex); }
/** Release lock_sys.mutex */
void lock_sys_t::mutex_unlock() { mysql_mutex_unlock(&mutex); }
#endif
@@ -3571,9 +3541,6 @@ lock_table(
lock_sys.mutex_lock();
- DBUG_EXECUTE_IF("fatal-semaphore-timeout",
- { os_thread_sleep(3600000000LL); });
-
/* We have to check if the new lock is compatible with any locks
other transactions have in the table lock queue. */
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 5de60e2c862..d08d5662a69 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -2304,6 +2304,8 @@ row_create_table_for_mysql(
ut_d(dict_sys.assert_locked());
ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ DEBUG_SYNC_C("create_table");
+
DBUG_EXECUTE_IF(
"ib_create_table_fail_at_start_of_row_create_table_for_mysql",
goto err_exit;
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 6a5e400015f..4fb2ffcc7bf 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -1349,20 +1349,6 @@ void srv_monitor_task(void*)
const ulonglong now = my_hrtime_coarse().val;
const ulong threshold = srv_fatal_semaphore_wait_threshold;
- if (ulonglong start = lock_sys.oldest_wait()) {
- ulong waited = static_cast<ulong>((now - start) / 1000000);
- if (waited >= threshold) {
- ib::fatal() << lock_sys.fatal_msg;
- }
-
- if (waited == threshold / 4
- || waited == threshold / 2
- || waited == threshold / 4 * 3) {
- ib::warn() << "Long wait (" << waited
- << " seconds) for lock_sys.mutex";
- }
- }
-
if (ulonglong start = dict_sys.oldest_wait()) {
ulong waited = static_cast<ulong>((now - start) / 1000000);
if (waited >= threshold) {