diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2020-11-21 11:38:42 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2020-11-23 07:43:33 +0200 |
commit | 7315a6a710b77ae18241cfa62626d1c46a7431bf (patch) | |
tree | f8d74197cc186efef14fd09e4942b02bb374ffac | |
parent | c3a868b1917266a46fd17808033a830ddb801aaf (diff) | |
download | mariadb-git-7315a6a710b77ae18241cfa62626d1c46a7431bf.tar.gz |
Remove rw_lock::read_lock_yield() to avoid starvation
The greedy fetch_add(1) approach of read_trylock() may cause
starvation of a waiting write lock request. Let us use a
compare-and-swap for the read lock acquisition in order to
guarantee the progress of writers.
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 16 | ||||
-rw-r--r-- | storage/innobase/include/rw_lock.h | 26 | ||||
-rw-r--r-- | storage/innobase/include/srw_lock.h | 12 | ||||
-rw-r--r-- | storage/innobase/sync/srw_lock_linux.cc | 37 |
4 files changed, 41 insertions, 50 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 176c357becc..a2928314d78 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -281,25 +281,17 @@ the read requests for the whole area. #ifndef UNIV_INNOCHECKSUM void page_hash_latch::read_lock_wait() { - auto l= read_lock_yield(); /* First, try busy spinning for a while. */ for (auto spin= srv_n_spin_wait_rounds; spin--; ) { - if (l & WRITER_PENDING) - ut_delay(srv_spin_wait_delay); + ut_delay(srv_spin_wait_delay); if (read_trylock()) return; - l= read_lock_yield(); } /* Fall back to yielding to other threads. */ - for (;;) - { - if (l & WRITER_PENDING) - os_thread_yield(); - if (read_trylock()) - return; - l= read_lock_yield(); - } + do + os_thread_yield(); + while (!read_trylock()); } void page_hash_latch::write_lock_wait() diff --git a/storage/innobase/include/rw_lock.h b/storage/innobase/include/rw_lock.h index 7caef8a7fa1..65831990f98 100644 --- a/storage/innobase/include/rw_lock.h +++ b/storage/innobase/include/rw_lock.h @@ -36,14 +36,6 @@ protected: /** Flag to indicate that write_lock() or write_lock_wait() is pending */ static constexpr uint32_t WRITER_PENDING= WRITER | WRITER_WAITING; - /** Yield a read lock request due to a conflict with a write lock. - @return the lock value */ - uint32_t read_lock_yield() - { - uint32_t l= lock.fetch_sub(1, std::memory_order_relaxed); - DBUG_ASSERT(l & ~WRITER_PENDING); - return l; - } /** Start waiting for an exclusive lock. @return current value of the lock word */ uint32_t write_lock_wait_start() @@ -59,6 +51,21 @@ protected: return lock.compare_exchange_strong(l, WRITER, std::memory_order_acquire, std::memory_order_relaxed); } + /** Try to acquire a shared lock. + @param l the value of the lock word + @return whether the lock was acquired */ + bool read_trylock(uint32_t &l) + { + l= UNLOCKED; + while (!lock.compare_exchange_strong(l, l + 1, std::memory_order_acquire, + std::memory_order_relaxed)) + { + DBUG_ASSERT(!(WRITER & l) || !(~WRITER_PENDING & l)); + if (l & WRITER_PENDING) + return false; + } + return true; + } /** Wait for an exclusive lock. @return whether the exclusive lock was acquired */ bool write_lock_poll() @@ -93,8 +100,7 @@ public: } /** Try to acquire a shared lock. @return whether the lock was acquired */ - bool read_trylock() - { return !(lock.fetch_add(1, std::memory_order_acquire) & WRITER_PENDING); } + bool read_trylock() { uint32_t l; return read_trylock(l); } /** Try to acquire an exclusive lock. @return whether the lock was acquired */ bool write_trylock() diff --git a/storage/innobase/include/srw_lock.h b/storage/innobase/include/srw_lock.h index ba587f72a9f..d81f78314f1 100644 --- a/storage/innobase/include/srw_lock.h +++ b/storage/innobase/include/srw_lock.h @@ -59,8 +59,9 @@ public: # else /** @return pointer to the lock word */ rw_lock *word() { return static_cast<rw_lock*>(this); } - /** Wait for a read lock after a failed read_trylock() */ - void read_lock(); + /** Wait for a read lock. + @param l lock word from a failed read_trylock() */ + void read_lock(uint32_t l); /** Wait for a write lock after a failed write_trylock() */ void write_lock(); # endif @@ -85,21 +86,22 @@ public: } void rd_lock() { + IF_WIN(, uint32_t l); # ifdef UNIV_PFS_RWLOCK - if (read_trylock()) + if (read_trylock(IF_WIN(, l))) return; if (pfs_psi) { PSI_rwlock_locker_state state; PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_rdwait) (&state, pfs_psi, PSI_RWLOCK_READLOCK, __FILE__, __LINE__); - read_lock(); + read_lock(IF_WIN(, l)); if (locker) PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0); return; } # endif /* UNIV_PFS_RWLOCK */ - IF_WIN(, if (!read_trylock())) read_lock(); + IF_WIN(read_lock(), if (!read_trylock(l)) read_lock(l)); } void wr_lock() { diff --git a/storage/innobase/sync/srw_lock_linux.cc b/storage/innobase/sync/srw_lock_linux.cc index b385775c5c2..98d7744e34a 100644 --- a/storage/innobase/sync/srw_lock_linux.cc +++ b/storage/innobase/sync/srw_lock_linux.cc @@ -31,37 +31,28 @@ int srw_lock_dummy_function() { return 0; } # include "srv0srv.h" -/** Wait for a read lock after a failed read_trylock() */ -void srw_lock::read_lock() +/** Wait for a read lock. +@param lock word value from a failed read_trylock() */ +void srw_lock::read_lock(uint32_t l) { - for (;;) + do { - uint32_t l= read_lock_yield() - 1; - if (l & WRITER_PENDING) - { - if (l == WRITER_WAITING) - wake_writer: - syscall(SYS_futex, word(), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0); - syscall(SYS_futex, word(), FUTEX_WAIT_PRIVATE, l, nullptr, nullptr, 0); - } + if (l == WRITER_WAITING) + wake_writer: + syscall(SYS_futex, word(), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0); else - { for (auto spin= srv_n_spin_wait_rounds; spin; spin--) { - if (read_trylock()) + ut_delay(srv_spin_wait_delay); + if (read_trylock(l)) return; - l= read_lock_yield() - 1; - if (l & WRITER_PENDING) - { - if (l == WRITER_WAITING) - goto wake_writer; - ut_delay(srv_spin_wait_delay); - } + else if (l == WRITER_WAITING) + goto wake_writer; } - } - if (read_trylock()) - return; + + syscall(SYS_futex, word(), FUTEX_WAIT_PRIVATE, l, nullptr, nullptr, 0); } + while (!read_trylock(l)); } /** Wait for a write lock after a failed write_trylock() */ |