summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-11-21 11:38:42 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2020-11-23 07:43:33 +0200
commit7315a6a710b77ae18241cfa62626d1c46a7431bf (patch)
treef8d74197cc186efef14fd09e4942b02bb374ffac
parentc3a868b1917266a46fd17808033a830ddb801aaf (diff)
downloadmariadb-git-7315a6a710b77ae18241cfa62626d1c46a7431bf.tar.gz
Remove rw_lock::read_lock_yield() to avoid starvation
The greedy fetch_add(1) approach of read_trylock() may cause starvation of a waiting write lock request. Let us use a compare-and-swap for the read lock acquisition in order to guarantee the progress of writers.
-rw-r--r--storage/innobase/buf/buf0buf.cc16
-rw-r--r--storage/innobase/include/rw_lock.h26
-rw-r--r--storage/innobase/include/srw_lock.h12
-rw-r--r--storage/innobase/sync/srw_lock_linux.cc37
4 files changed, 41 insertions, 50 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 176c357becc..a2928314d78 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -281,25 +281,17 @@ the read requests for the whole area.
#ifndef UNIV_INNOCHECKSUM
void page_hash_latch::read_lock_wait()
{
- auto l= read_lock_yield();
/* First, try busy spinning for a while. */
for (auto spin= srv_n_spin_wait_rounds; spin--; )
{
- if (l & WRITER_PENDING)
- ut_delay(srv_spin_wait_delay);
+ ut_delay(srv_spin_wait_delay);
if (read_trylock())
return;
- l= read_lock_yield();
}
/* Fall back to yielding to other threads. */
- for (;;)
- {
- if (l & WRITER_PENDING)
- os_thread_yield();
- if (read_trylock())
- return;
- l= read_lock_yield();
- }
+ do
+ os_thread_yield();
+ while (!read_trylock());
}
void page_hash_latch::write_lock_wait()
diff --git a/storage/innobase/include/rw_lock.h b/storage/innobase/include/rw_lock.h
index 7caef8a7fa1..65831990f98 100644
--- a/storage/innobase/include/rw_lock.h
+++ b/storage/innobase/include/rw_lock.h
@@ -36,14 +36,6 @@ protected:
/** Flag to indicate that write_lock() or write_lock_wait() is pending */
static constexpr uint32_t WRITER_PENDING= WRITER | WRITER_WAITING;
- /** Yield a read lock request due to a conflict with a write lock.
- @return the lock value */
- uint32_t read_lock_yield()
- {
- uint32_t l= lock.fetch_sub(1, std::memory_order_relaxed);
- DBUG_ASSERT(l & ~WRITER_PENDING);
- return l;
- }
/** Start waiting for an exclusive lock.
@return current value of the lock word */
uint32_t write_lock_wait_start()
@@ -59,6 +51,21 @@ protected:
return lock.compare_exchange_strong(l, WRITER, std::memory_order_acquire,
std::memory_order_relaxed);
}
+ /** Try to acquire a shared lock.
+ @param l the value of the lock word
+ @return whether the lock was acquired */
+ bool read_trylock(uint32_t &l)
+ {
+ l= UNLOCKED;
+ while (!lock.compare_exchange_strong(l, l + 1, std::memory_order_acquire,
+ std::memory_order_relaxed))
+ {
+ DBUG_ASSERT(!(WRITER & l) || !(~WRITER_PENDING & l));
+ if (l & WRITER_PENDING)
+ return false;
+ }
+ return true;
+ }
/** Wait for an exclusive lock.
@return whether the exclusive lock was acquired */
bool write_lock_poll()
@@ -93,8 +100,7 @@ public:
}
/** Try to acquire a shared lock.
@return whether the lock was acquired */
- bool read_trylock()
- { return !(lock.fetch_add(1, std::memory_order_acquire) & WRITER_PENDING); }
+ bool read_trylock() { uint32_t l; return read_trylock(l); }
/** Try to acquire an exclusive lock.
@return whether the lock was acquired */
bool write_trylock()
diff --git a/storage/innobase/include/srw_lock.h b/storage/innobase/include/srw_lock.h
index ba587f72a9f..d81f78314f1 100644
--- a/storage/innobase/include/srw_lock.h
+++ b/storage/innobase/include/srw_lock.h
@@ -59,8 +59,9 @@ public:
# else
/** @return pointer to the lock word */
rw_lock *word() { return static_cast<rw_lock*>(this); }
- /** Wait for a read lock after a failed read_trylock() */
- void read_lock();
+ /** Wait for a read lock.
+ @param l lock word from a failed read_trylock() */
+ void read_lock(uint32_t l);
/** Wait for a write lock after a failed write_trylock() */
void write_lock();
# endif
@@ -85,21 +86,22 @@ public:
}
void rd_lock()
{
+ IF_WIN(, uint32_t l);
# ifdef UNIV_PFS_RWLOCK
- if (read_trylock())
+ if (read_trylock(IF_WIN(, l)))
return;
if (pfs_psi)
{
PSI_rwlock_locker_state state;
PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_rdwait)
(&state, pfs_psi, PSI_RWLOCK_READLOCK, __FILE__, __LINE__);
- read_lock();
+ read_lock(IF_WIN(, l));
if (locker)
PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
return;
}
# endif /* UNIV_PFS_RWLOCK */
- IF_WIN(, if (!read_trylock())) read_lock();
+ IF_WIN(read_lock(), if (!read_trylock(l)) read_lock(l));
}
void wr_lock()
{
diff --git a/storage/innobase/sync/srw_lock_linux.cc b/storage/innobase/sync/srw_lock_linux.cc
index b385775c5c2..98d7744e34a 100644
--- a/storage/innobase/sync/srw_lock_linux.cc
+++ b/storage/innobase/sync/srw_lock_linux.cc
@@ -31,37 +31,28 @@ int srw_lock_dummy_function() { return 0; }
# include "srv0srv.h"
-/** Wait for a read lock after a failed read_trylock() */
-void srw_lock::read_lock()
+/** Wait for a read lock.
+@param lock word value from a failed read_trylock() */
+void srw_lock::read_lock(uint32_t l)
{
- for (;;)
+ do
{
- uint32_t l= read_lock_yield() - 1;
- if (l & WRITER_PENDING)
- {
- if (l == WRITER_WAITING)
- wake_writer:
- syscall(SYS_futex, word(), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
- syscall(SYS_futex, word(), FUTEX_WAIT_PRIVATE, l, nullptr, nullptr, 0);
- }
+ if (l == WRITER_WAITING)
+ wake_writer:
+ syscall(SYS_futex, word(), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
else
- {
for (auto spin= srv_n_spin_wait_rounds; spin; spin--)
{
- if (read_trylock())
+ ut_delay(srv_spin_wait_delay);
+ if (read_trylock(l))
return;
- l= read_lock_yield() - 1;
- if (l & WRITER_PENDING)
- {
- if (l == WRITER_WAITING)
- goto wake_writer;
- ut_delay(srv_spin_wait_delay);
- }
+ else if (l == WRITER_WAITING)
+ goto wake_writer;
}
- }
- if (read_trylock())
- return;
+
+ syscall(SYS_futex, word(), FUTEX_WAIT_PRIVATE, l, nullptr, nullptr, 0);
}
+ while (!read_trylock(l));
}
/** Wait for a write lock after a failed write_trylock() */