summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-11-05 18:47:50 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2020-11-05 18:47:50 +0200
commit165dc04776c320d95a2ec16bedd4c18fa157262a (patch)
tree2b0ecbe439896e8ff6e6ccb6d7dd7a5d72c48601
parentf424eb974d2cf5fe875fb41129ee2e638c67eebe (diff)
downloadmariadb-git-165dc04776c320d95a2ec16bedd4c18fa157262a.tar.gz
MDEV-24142 rw_lock_t has unnecessarily complex wait logicbb-10.5-MDEV-24142
Let us use a single rw_lock_t::wait_mutex and two condition variables wait_cond, wait_cond_ex instead of two os_event_t (which wrap a mutex and condition variable and some other data) sync_array_wait_event(): Implement new waiting rules for rw_lock_t. FIXME: Sometimes, rw_lock_x_lock_wait_func() hangs here, with lock->lock_word == -X_LOCK_HALF_DECR. Perhaps we should treat not only the values 0 and -X_LOCK_HALF_DECR but also X_LOCK_HALF_DECR specially? rw_lock_x_unlock_func(): Clean up.
-rw-r--r--include/my_atomic_wrapper.h4
-rw-r--r--storage/innobase/include/sync0rw.h21
-rw-r--r--storage/innobase/include/sync0rw.ic89
-rw-r--r--storage/innobase/sync/sync0arr.cc70
-rw-r--r--storage/innobase/sync/sync0rw.cc74
5 files changed, 144 insertions, 114 deletions
diff --git a/include/my_atomic_wrapper.h b/include/my_atomic_wrapper.h
index c0e18ea7c91..b256ba1069a 100644
--- a/include/my_atomic_wrapper.h
+++ b/include/my_atomic_wrapper.h
@@ -41,7 +41,9 @@ public:
Atomic_relaxed(Type val) : m(val) {}
Atomic_relaxed() {}
- operator Type() const { return m.load(std::memory_order_relaxed); }
+ Type load(std::memory_order o= std::memory_order_relaxed) const
+ { return m.load(o); }
+ operator Type() const { return load(); }
Type operator=(const Type val)
{ m.store(val, std::memory_order_relaxed); return val; }
Type operator=(const Atomic_relaxed<Type> &rhs) { return *this= Type{rhs}; }
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index fce598f0930..090aa2a82fe 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -34,7 +34,6 @@ Created 9/11/1995 Heikki Tuuri
#ifndef sync0rw_h
#define sync0rw_h
-#include "os0event.h"
#include "ut0mutex.h"
#include "ilist.h"
@@ -572,7 +571,7 @@ struct rw_lock_t :
/** Holds the state of the lock. */
Atomic_relaxed<int32_t> lock_word;
- /** 0=no waiters, 1=waiters for X or SX lock exist */
+ /** 0=no waiters, 1=waiters exist */
Atomic_relaxed<uint32_t> waiters;
/** number of granted SX locks. */
@@ -586,12 +585,14 @@ struct rw_lock_t :
the lock_word. */
volatile os_thread_id_t writer_thread;
- /** Used by sync0arr.cc for thread queueing */
- os_event_t event;
-
- /** Event for next-writer to wait on. A thread must decrement
- lock_word before waiting. */
- os_event_t wait_ex_event;
+ /** Mutex to wait on conflict */
+ mysql_mutex_t wait_mutex;
+ /** Condition variable for an ordinary lock request (S, SX, X) */
+ mysql_cond_t wait_cond;
+ /** Condition variable for a successfully enqueued wait for an
+ exclusive lock. Subsequent requests must wait until it has been
+ granted and released. */
+ mysql_cond_t wait_ex_cond;
/** File name where lock created */
const char* cfile_name;
@@ -625,6 +626,10 @@ struct rw_lock_t :
/** Level in the global latching order. */
latch_level_t level;
#endif /* UNIV_DEBUG */
+ /** Wake up pending (not enqueued) waiters */
+ void wakeup_waiters();
+ /** Wake up the enqueued exclusive lock waiter */
+ void wakeup_wait_ex();
};
#ifdef UNIV_DEBUG
/** The structure for storing debug info of an rw-lock. All access to this
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index 169cbdd9aa5..b2dc7ab6454 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -31,8 +31,6 @@ The read-write lock (for threads)
Created 9/11/1995 Heikki Tuuri
*******************************************************/
-#include "os0event.h"
-
/******************************************************************//**
Lock an rw-lock in shared mode for the current thread. If the rw-lock is
locked in exclusive mode, or there is an exclusive lock request waiting,
@@ -371,8 +369,7 @@ rw_lock_s_unlock_func(
/* wait_ex waiter exists. It may not be asleep, but we signal
anyway. We do not wake other waiters, because they can't
exist without wait_ex waiter and wait_ex waiter goes first.*/
- os_event_set(lock->wait_ex_event);
- sync_array_object_signalled();
+ lock->wakeup_wait_ex();
} else {
ut_ad(lock_word > -X_LOCK_DECR);
ut_ad(lock_word < X_LOCK_DECR);
@@ -393,44 +390,34 @@ rw_lock_x_unlock_func(
#endif /* UNIV_DEBUG */
rw_lock_t* lock) /*!< in/out: rw-lock */
{
- int32_t lock_word = lock->lock_word;
-
- if (lock_word == 0) {
- /* Last caller in a possible recursive chain. */
- lock->writer_thread = 0;
- }
-
- ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X));
-
- if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
- /* Last X-lock owned by this thread, it may still hold SX-locks.
- ACQ_REL due to...
- RELEASE: we release rw-lock
- ACQUIRE: we want waiters to be loaded after lock_word is stored */
- lock->lock_word.fetch_add(X_LOCK_DECR,
- std::memory_order_acq_rel);
-
- /* This no longer has an X-lock but it may still have
- an SX-lock. So it is now free for S-locks by other threads.
- We need to signal read/write waiters.
- We do not need to signal wait_ex waiters, since they cannot
- exist when there is a writer. */
- if (lock->waiters) {
- lock->waiters = 0;
- os_event_set(lock->event);
- sync_array_object_signalled();
- }
- } else if (lock_word == -X_LOCK_DECR
- || lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
- /* There are 2 x-locks */
- lock->lock_word.fetch_add(X_LOCK_DECR);
- } else {
- /* There are more than 2 x-locks. */
- ut_ad(lock_word < -X_LOCK_DECR);
- lock->lock_word.fetch_add(1);
- }
-
- ut_ad(rw_lock_validate(lock));
+ ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X));
+
+ switch (int32_t lock_word= lock->lock_word) {
+ case 0:
+ /* Last caller in a possible recursive chain. */
+ lock->writer_thread= 0;
+ /* fall through */
+ case -X_LOCK_HALF_DECR:
+ /* Last X-lock owned by this thread, it may still hold SX-locks.
+ ACQ_REL due to...
+ RELEASE: we release rw-lock
+ ACQUIRE: we want waiters to be loaded after lock_word is stored */
+ lock->lock_word.fetch_add(X_LOCK_DECR, std::memory_order_acq_rel);
+ if (lock->waiters)
+ lock->wakeup_waiters();
+ break;
+ case -X_LOCK_DECR:
+ case -X_LOCK_DECR - X_LOCK_HALF_DECR:
+ /* There are 2 x-locks */
+ lock->lock_word.fetch_add(X_LOCK_DECR);
+ break;
+ default:
+ /* There are more than 2 x-locks. */
+ ut_ad(lock_word < -X_LOCK_DECR);
+ lock->lock_word.fetch_add(1);
+ }
+
+ ut_ad(rw_lock_validate(lock));
}
/******************************************************************//**
@@ -447,12 +434,9 @@ rw_lock_sx_unlock_func(
{
ut_ad(rw_lock_get_sx_lock_count(lock));
ut_ad(lock->sx_recursive > 0);
-
- --lock->sx_recursive;
-
ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_SX));
- if (lock->sx_recursive == 0) {
+ if (--lock->sx_recursive == 0) {
int32_t lock_word = lock->lock_word;
/* Last caller in a possible recursive chain. */
if (lock_word > 0) {
@@ -466,20 +450,21 @@ rw_lock_sx_unlock_func(
lock->lock_word.fetch_add(X_LOCK_HALF_DECR,
std::memory_order_acq_rel);
- /* Lock is now free. May have to signal read/write
- waiters. We do not need to signal wait_ex waiters,
+ /* We do not need to signal wait_ex waiters,
since they cannot exist when there is an sx-lock
holder. */
if (lock->waiters) {
- lock->waiters = 0;
- os_event_set(lock->event);
- sync_array_object_signalled();
+ lock->wakeup_waiters();
}
} else {
/* still has x-lock */
ut_ad(lock_word == -X_LOCK_HALF_DECR ||
lock_word <= -(X_LOCK_DECR + X_LOCK_HALF_DECR));
- lock->lock_word.fetch_add(X_LOCK_HALF_DECR);
+ switch (lock->lock_word.fetch_add(X_LOCK_HALF_DECR)) {
+ case -X_LOCK_HALF_DECR * 2:
+ case -X_LOCK_HALF_DECR:
+ lock->wakeup_wait_ex();
+ }
}
}
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc
index 4b6f818000c..d8a78304bd5 100644
--- a/storage/innobase/sync/sync0arr.cc
+++ b/storage/innobase/sync/sync0arr.cc
@@ -108,7 +108,7 @@ struct sync_cell_t {
called sync_array_event_wait
on this cell */
int64_t signal_count; /*!< We capture the signal_count
- of the latch when we
+ of the latch.mutex when we
reset the event. This value is
then passed on to os_event_wait
and we wait only if the event
@@ -282,24 +282,6 @@ sync_array_free(
UT_DELETE(arr);
}
-/*******************************************************************//**
-Returns the event that the thread owning the cell waits for. */
-static
-os_event_t
-sync_cell_get_event(
-/*================*/
- sync_cell_t* cell) /*!< in: non-empty sync array cell */
-{
- switch(cell->request_type) {
- case SYNC_MUTEX:
- return(cell->latch.mutex->event());
- case RW_LOCK_X_WAIT:
- return(cell->latch.lock->wait_ex_event);
- default:
- return(cell->latch.lock->event);
- }
-}
-
/******************************************************************//**
Reserves a wait array cell for waiting for an object.
The event of the cell is reset to nonsignalled state.
@@ -365,8 +347,9 @@ sync_array_reserve_cell(
/* Make sure the event is reset and also store the value of
signal_count at which the event was reset. */
- os_event_t event = sync_cell_get_event(cell);
- cell->signal_count = os_event_reset(event);
+ if (cell->request_type == SYNC_MUTEX) {
+ cell->signal_count= os_event_reset(cell->latch.mutex->event());
+ }
return(cell);
}
@@ -453,7 +436,50 @@ sync_array_wait_event(
sync_array_exit(arr);
tpool::tpool_wait_begin();
- os_event_wait_low(sync_cell_get_event(cell), cell->signal_count);
+
+ if (cell->request_type == SYNC_MUTEX) {
+ os_event_wait_low(cell->latch.mutex->event(),
+ cell->signal_count);
+ } else {
+ rw_lock_t *lock= cell->latch.lock;
+ const int32_t lock_word = lock->lock_word;
+ if (cell->request_type == RW_LOCK_X_WAIT) {
+ switch (lock_word) {
+ case 0:
+ case -X_LOCK_DECR:
+ break;
+ default:
+ mysql_mutex_lock(&lock->wait_mutex);
+ while (const int32_t l = lock->lock_word) {
+ if (l == -X_LOCK_DECR) {
+ break;
+ }
+ mysql_cond_wait(&lock->wait_ex_cond,
+ &lock->wait_mutex);
+ }
+ mysql_mutex_unlock(&lock->wait_mutex);
+ }
+ } else if (lock_word <= 0) {
+ mysql_mutex_lock(&lock->wait_mutex);
+ for (;;) {
+ /* Ensure that we will be woken up. */
+ lock->waiters.exchange(
+ 1, std::memory_order_acquire);
+ int32_t l = lock->lock_word.load(
+ std::memory_order_acquire);
+ if (l > 0) {
+ break;
+ } else if (l == 0) {
+ mysql_cond_signal(&lock->wait_ex_cond);
+ }
+
+ mysql_cond_wait(&lock->wait_cond,
+ &lock->wait_mutex);
+ }
+ mysql_mutex_unlock(&lock->wait_mutex);
+ }
+ }
+
tpool::tpool_wait_end();
sync_array_free_cell(arr, cell);
diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc
index b86e11900b8..7db75327196 100644
--- a/storage/innobase/sync/sync0rw.cc
+++ b/storage/innobase/sync/sync0rw.cc
@@ -111,31 +111,24 @@ waiters: May be set to 1 anytime, but to avoid unnecessary wake-up
memory barrier is required after waiters is set, and before
verifying lock_word is still held, to ensure some unlocker
really does see the flags new value.
-event: Threads wait on event for read or writer lock when another
+wait_cond: Threads wait for read or writer lock when another
thread has an x-lock or an x-lock reservation (wait_ex). A
- thread may only wait on event after performing the following
+ thread may only wait after performing the following
actions in order:
- (1) Record the counter value of event (with os_event_reset).
- (2) Set waiters to 1.
- (3) Verify lock_word <= 0.
- (1) must come before (2) to ensure signal is not missed.
- (2) must come before (3) to ensure a signal is sent.
+ (1) Set waiters to 1 (ensure that wait_cond be signaled).
+ (2) Verify lock_word <= 0.
These restrictions force the above ordering.
Immediately before sending the wake-up signal, we should:
(1) Verify lock_word == X_LOCK_DECR (unlocked)
(2) Reset waiters to 0.
-wait_ex_event: A thread may only wait on the wait_ex_event after it has
+wait_ex_cond: A thread may only wait on the wait_ex_cond after it has
performed the following actions in order:
(1) Decrement lock_word by X_LOCK_DECR.
- (2) Record counter value of wait_ex_event (os_event_reset,
- called from sync_array_reserve_cell).
- (3) Verify that lock_word < 0.
- (1) must come first to ensures no other threads become reader
- or next writer, and notifies unlocker that signal must be sent.
- (2) must come before (3) to ensure the signal is not missed.
+ (2) Verify that lock_word < 0.
These restrictions force the above ordering.
Immediately before sending the wake-up signal, we should:
- Verify lock_word == 0 (waiting thread holds x_lock)
+ Verify lock_word == 0 || lock_word == -X_LOCK_HALF_DECR
+ (waiting thread holds x_lock)
*/
rw_lock_stats_t rw_lock_stats;
@@ -229,8 +222,9 @@ rw_lock_create_func(
lock->count_os_wait = 0;
lock->last_x_file_name = "not yet reserved";
lock->last_x_line = 0;
- lock->event = os_event_create(0);
- lock->wait_ex_event = os_event_create(0);
+ mysql_mutex_init(0, &lock->wait_mutex, nullptr);
+ mysql_cond_init(0, &lock->wait_cond, nullptr);
+ mysql_cond_init(0, &lock->wait_ex_cond, nullptr);
lock->is_block_lock = 0;
@@ -257,13 +251,34 @@ rw_lock_free_func(
mutex_enter(&rw_lock_list_mutex);
- os_event_destroy(lock->event);
-
- os_event_destroy(lock->wait_ex_event);
-
rw_lock_list.remove(*lock);
mutex_exit(&rw_lock_list_mutex);
+
+ mysql_mutex_destroy(&lock->wait_mutex);
+ mysql_cond_destroy(&lock->wait_cond);
+ mysql_cond_destroy(&lock->wait_ex_cond);
+}
+
+/** Wake up pending (not enqueued) waiters */
+void rw_lock_t::wakeup_waiters()
+{
+ mysql_mutex_lock(&wait_mutex);
+ const auto w= waiters.exchange(0);
+ if (w)
+ mysql_cond_broadcast(&wait_cond);
+ mysql_mutex_unlock(&wait_mutex);
+ if (w)
+ sync_array_object_signalled();
+}
+
+/** Wake up the enqueued exclusive lock waiter */
+void rw_lock_t::wakeup_wait_ex()
+{
+ mysql_mutex_lock(&wait_mutex);
+ mysql_cond_signal(&wait_ex_cond);
+ mysql_mutex_unlock(&wait_mutex);
+ sync_array_object_signalled();
}
/******************************************************************//**
@@ -291,23 +306,21 @@ rw_lock_s_lock_spin(
ut_ad(rw_lock_validate(lock));
rw_lock_stats.rw_s_spin_wait_count.inc();
-
lock_loop:
-
/* Spin waiting for the writer field to become free */
HMT_low();
ulint j = i;
- while (i < srv_n_spin_wait_rounds &&
- lock->lock_word <= 0) {
+ for (; i < srv_n_spin_wait_rounds; i++) {
+ if (lock->lock_word > 0) {
+ goto available;
+ }
ut_delay(srv_spin_wait_delay);
- i++;
}
HMT_medium();
- if (i >= srv_n_spin_wait_rounds) {
- os_thread_yield();
- }
-
+ os_thread_yield();
+available:
+ HMT_medium();
spin_count += lint(i - j);
/* We try once again to obtain the lock */
@@ -786,7 +799,6 @@ lock_loop:
return;
} else {
-
/* Spin waiting for the lock_word to become free */
ulint j = i;
while (i < srv_n_spin_wait_rounds