diff options
-rw-r--r-- | include/my_atomic_wrapper.h | 4 | ||||
-rw-r--r-- | storage/innobase/include/sync0rw.h | 21 | ||||
-rw-r--r-- | storage/innobase/include/sync0rw.ic | 89 | ||||
-rw-r--r-- | storage/innobase/sync/sync0arr.cc | 70 | ||||
-rw-r--r-- | storage/innobase/sync/sync0rw.cc | 74 |
5 files changed, 144 insertions, 114 deletions
diff --git a/include/my_atomic_wrapper.h b/include/my_atomic_wrapper.h index c0e18ea7c91..b256ba1069a 100644 --- a/include/my_atomic_wrapper.h +++ b/include/my_atomic_wrapper.h @@ -41,7 +41,9 @@ public: Atomic_relaxed(Type val) : m(val) {} Atomic_relaxed() {} - operator Type() const { return m.load(std::memory_order_relaxed); } + Type load(std::memory_order o= std::memory_order_relaxed) const + { return m.load(o); } + operator Type() const { return load(); } Type operator=(const Type val) { m.store(val, std::memory_order_relaxed); return val; } Type operator=(const Atomic_relaxed<Type> &rhs) { return *this= Type{rhs}; } diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h index fce598f0930..090aa2a82fe 100644 --- a/storage/innobase/include/sync0rw.h +++ b/storage/innobase/include/sync0rw.h @@ -34,7 +34,6 @@ Created 9/11/1995 Heikki Tuuri #ifndef sync0rw_h #define sync0rw_h -#include "os0event.h" #include "ut0mutex.h" #include "ilist.h" @@ -572,7 +571,7 @@ struct rw_lock_t : /** Holds the state of the lock. */ Atomic_relaxed<int32_t> lock_word; - /** 0=no waiters, 1=waiters for X or SX lock exist */ + /** 0=no waiters, 1=waiters exist */ Atomic_relaxed<uint32_t> waiters; /** number of granted SX locks. */ @@ -586,12 +585,14 @@ struct rw_lock_t : the lock_word. */ volatile os_thread_id_t writer_thread; - /** Used by sync0arr.cc for thread queueing */ - os_event_t event; - - /** Event for next-writer to wait on. A thread must decrement - lock_word before waiting. */ - os_event_t wait_ex_event; + /** Mutex to wait on conflict */ + mysql_mutex_t wait_mutex; + /** Condition variable for an ordinary lock request (S, SX, X) */ + mysql_cond_t wait_cond; + /** Condition variable for a successfully enqueued wait for an + exclusive lock. Subsequent requests must wait until it has been + granted and released. */ + mysql_cond_t wait_ex_cond; /** File name where lock created */ const char* cfile_name; @@ -625,6 +626,10 @@ struct rw_lock_t : /** Level in the global latching order. */ latch_level_t level; #endif /* UNIV_DEBUG */ + /** Wake up pending (not enqueued) waiters */ + void wakeup_waiters(); + /** Wake up the enqueued exclusive lock waiter */ + void wakeup_wait_ex(); }; #ifdef UNIV_DEBUG /** The structure for storing debug info of an rw-lock. All access to this diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic index 169cbdd9aa5..b2dc7ab6454 100644 --- a/storage/innobase/include/sync0rw.ic +++ b/storage/innobase/include/sync0rw.ic @@ -31,8 +31,6 @@ The read-write lock (for threads) Created 9/11/1995 Heikki Tuuri *******************************************************/ -#include "os0event.h" - /******************************************************************//** Lock an rw-lock in shared mode for the current thread. If the rw-lock is locked in exclusive mode, or there is an exclusive lock request waiting, @@ -371,8 +369,7 @@ rw_lock_s_unlock_func( /* wait_ex waiter exists. It may not be asleep, but we signal anyway. We do not wake other waiters, because they can't exist without wait_ex waiter and wait_ex waiter goes first.*/ - os_event_set(lock->wait_ex_event); - sync_array_object_signalled(); + lock->wakeup_wait_ex(); } else { ut_ad(lock_word > -X_LOCK_DECR); ut_ad(lock_word < X_LOCK_DECR); @@ -393,44 +390,34 @@ rw_lock_x_unlock_func( #endif /* UNIV_DEBUG */ rw_lock_t* lock) /*!< in/out: rw-lock */ { - int32_t lock_word = lock->lock_word; - - if (lock_word == 0) { - /* Last caller in a possible recursive chain. */ - lock->writer_thread = 0; - } - - ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X)); - - if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) { - /* Last X-lock owned by this thread, it may still hold SX-locks. - ACQ_REL due to... - RELEASE: we release rw-lock - ACQUIRE: we want waiters to be loaded after lock_word is stored */ - lock->lock_word.fetch_add(X_LOCK_DECR, - std::memory_order_acq_rel); - - /* This no longer has an X-lock but it may still have - an SX-lock. So it is now free for S-locks by other threads. - We need to signal read/write waiters. - We do not need to signal wait_ex waiters, since they cannot - exist when there is a writer. */ - if (lock->waiters) { - lock->waiters = 0; - os_event_set(lock->event); - sync_array_object_signalled(); - } - } else if (lock_word == -X_LOCK_DECR - || lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR)) { - /* There are 2 x-locks */ - lock->lock_word.fetch_add(X_LOCK_DECR); - } else { - /* There are more than 2 x-locks. */ - ut_ad(lock_word < -X_LOCK_DECR); - lock->lock_word.fetch_add(1); - } - - ut_ad(rw_lock_validate(lock)); + ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X)); + + switch (int32_t lock_word= lock->lock_word) { + case 0: + /* Last caller in a possible recursive chain. */ + lock->writer_thread= 0; + /* fall through */ + case -X_LOCK_HALF_DECR: + /* Last X-lock owned by this thread, it may still hold SX-locks. + ACQ_REL due to... + RELEASE: we release rw-lock + ACQUIRE: we want waiters to be loaded after lock_word is stored */ + lock->lock_word.fetch_add(X_LOCK_DECR, std::memory_order_acq_rel); + if (lock->waiters) + lock->wakeup_waiters(); + break; + case -X_LOCK_DECR: + case -X_LOCK_DECR - X_LOCK_HALF_DECR: + /* There are 2 x-locks */ + lock->lock_word.fetch_add(X_LOCK_DECR); + break; + default: + /* There are more than 2 x-locks. */ + ut_ad(lock_word < -X_LOCK_DECR); + lock->lock_word.fetch_add(1); + } + + ut_ad(rw_lock_validate(lock)); } /******************************************************************//** @@ -447,12 +434,9 @@ rw_lock_sx_unlock_func( { ut_ad(rw_lock_get_sx_lock_count(lock)); ut_ad(lock->sx_recursive > 0); - - --lock->sx_recursive; - ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_SX)); - if (lock->sx_recursive == 0) { + if (--lock->sx_recursive == 0) { int32_t lock_word = lock->lock_word; /* Last caller in a possible recursive chain. */ if (lock_word > 0) { @@ -466,20 +450,21 @@ rw_lock_sx_unlock_func( lock->lock_word.fetch_add(X_LOCK_HALF_DECR, std::memory_order_acq_rel); - /* Lock is now free. May have to signal read/write - waiters. We do not need to signal wait_ex waiters, + /* We do not need to signal wait_ex waiters, since they cannot exist when there is an sx-lock holder. */ if (lock->waiters) { - lock->waiters = 0; - os_event_set(lock->event); - sync_array_object_signalled(); + lock->wakeup_waiters(); } } else { /* still has x-lock */ ut_ad(lock_word == -X_LOCK_HALF_DECR || lock_word <= -(X_LOCK_DECR + X_LOCK_HALF_DECR)); - lock->lock_word.fetch_add(X_LOCK_HALF_DECR); + switch (lock->lock_word.fetch_add(X_LOCK_HALF_DECR)) { + case -X_LOCK_HALF_DECR * 2: + case -X_LOCK_HALF_DECR: + lock->wakeup_wait_ex(); + } } } diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc index 4b6f818000c..d8a78304bd5 100644 --- a/storage/innobase/sync/sync0arr.cc +++ b/storage/innobase/sync/sync0arr.cc @@ -108,7 +108,7 @@ struct sync_cell_t { called sync_array_event_wait on this cell */ int64_t signal_count; /*!< We capture the signal_count - of the latch when we + of the latch.mutex when we reset the event. This value is then passed on to os_event_wait and we wait only if the event @@ -282,24 +282,6 @@ sync_array_free( UT_DELETE(arr); } -/*******************************************************************//** -Returns the event that the thread owning the cell waits for. */ -static -os_event_t -sync_cell_get_event( -/*================*/ - sync_cell_t* cell) /*!< in: non-empty sync array cell */ -{ - switch(cell->request_type) { - case SYNC_MUTEX: - return(cell->latch.mutex->event()); - case RW_LOCK_X_WAIT: - return(cell->latch.lock->wait_ex_event); - default: - return(cell->latch.lock->event); - } -} - /******************************************************************//** Reserves a wait array cell for waiting for an object. The event of the cell is reset to nonsignalled state. @@ -365,8 +347,9 @@ sync_array_reserve_cell( /* Make sure the event is reset and also store the value of signal_count at which the event was reset. */ - os_event_t event = sync_cell_get_event(cell); - cell->signal_count = os_event_reset(event); + if (cell->request_type == SYNC_MUTEX) { + cell->signal_count= os_event_reset(cell->latch.mutex->event()); + } return(cell); } @@ -453,7 +436,50 @@ sync_array_wait_event( sync_array_exit(arr); tpool::tpool_wait_begin(); - os_event_wait_low(sync_cell_get_event(cell), cell->signal_count); + + if (cell->request_type == SYNC_MUTEX) { + os_event_wait_low(cell->latch.mutex->event(), + cell->signal_count); + } else { + rw_lock_t *lock= cell->latch.lock; + const int32_t lock_word = lock->lock_word; + if (cell->request_type == RW_LOCK_X_WAIT) { + switch (lock_word) { + case 0: + case -X_LOCK_DECR: + break; + default: + mysql_mutex_lock(&lock->wait_mutex); + while (const int32_t l = lock->lock_word) { + if (l == -X_LOCK_DECR) { + break; + } + mysql_cond_wait(&lock->wait_ex_cond, + &lock->wait_mutex); + } + mysql_mutex_unlock(&lock->wait_mutex); + } + } else if (lock_word <= 0) { + mysql_mutex_lock(&lock->wait_mutex); + for (;;) { + /* Ensure that we will be woken up. */ + lock->waiters.exchange( + 1, std::memory_order_acquire); + int32_t l = lock->lock_word.load( + std::memory_order_acquire); + if (l > 0) { + break; + } else if (l == 0) { + mysql_cond_signal(&lock->wait_ex_cond); + } + + mysql_cond_wait(&lock->wait_cond, + &lock->wait_mutex); + } + mysql_mutex_unlock(&lock->wait_mutex); + } + } + tpool::tpool_wait_end(); sync_array_free_cell(arr, cell); diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc index b86e11900b8..7db75327196 100644 --- a/storage/innobase/sync/sync0rw.cc +++ b/storage/innobase/sync/sync0rw.cc @@ -111,31 +111,24 @@ waiters: May be set to 1 anytime, but to avoid unnecessary wake-up memory barrier is required after waiters is set, and before verifying lock_word is still held, to ensure some unlocker really does see the flags new value. -event: Threads wait on event for read or writer lock when another +wait_cond: Threads wait for read or writer lock when another thread has an x-lock or an x-lock reservation (wait_ex). A - thread may only wait on event after performing the following + thread may only wait after performing the following actions in order: - (1) Record the counter value of event (with os_event_reset). - (2) Set waiters to 1. - (3) Verify lock_word <= 0. - (1) must come before (2) to ensure signal is not missed. - (2) must come before (3) to ensure a signal is sent. + (1) Set waiters to 1 (ensure that wait_cond be signaled). + (2) Verify lock_word <= 0. These restrictions force the above ordering. Immediately before sending the wake-up signal, we should: (1) Verify lock_word == X_LOCK_DECR (unlocked) (2) Reset waiters to 0. -wait_ex_event: A thread may only wait on the wait_ex_event after it has +wait_ex_cond: A thread may only wait on the wait_ex_cond after it has performed the following actions in order: (1) Decrement lock_word by X_LOCK_DECR. - (2) Record counter value of wait_ex_event (os_event_reset, - called from sync_array_reserve_cell). - (3) Verify that lock_word < 0. - (1) must come first to ensures no other threads become reader - or next writer, and notifies unlocker that signal must be sent. - (2) must come before (3) to ensure the signal is not missed. + (2) Verify that lock_word < 0. These restrictions force the above ordering. Immediately before sending the wake-up signal, we should: - Verify lock_word == 0 (waiting thread holds x_lock) + Verify lock_word == 0 || lock_word == -X_LOCK_HALF_DECR + (waiting thread holds x_lock) */ rw_lock_stats_t rw_lock_stats; @@ -229,8 +222,9 @@ rw_lock_create_func( lock->count_os_wait = 0; lock->last_x_file_name = "not yet reserved"; lock->last_x_line = 0; - lock->event = os_event_create(0); - lock->wait_ex_event = os_event_create(0); + mysql_mutex_init(0, &lock->wait_mutex, nullptr); + mysql_cond_init(0, &lock->wait_cond, nullptr); + mysql_cond_init(0, &lock->wait_ex_cond, nullptr); lock->is_block_lock = 0; @@ -257,13 +251,34 @@ rw_lock_free_func( mutex_enter(&rw_lock_list_mutex); - os_event_destroy(lock->event); - - os_event_destroy(lock->wait_ex_event); - rw_lock_list.remove(*lock); mutex_exit(&rw_lock_list_mutex); + + mysql_mutex_destroy(&lock->wait_mutex); + mysql_cond_destroy(&lock->wait_cond); + mysql_cond_destroy(&lock->wait_ex_cond); +} + +/** Wake up pending (not enqueued) waiters */ +void rw_lock_t::wakeup_waiters() +{ + mysql_mutex_lock(&wait_mutex); + const auto w= waiters.exchange(0); + if (w) + mysql_cond_broadcast(&wait_cond); + mysql_mutex_unlock(&wait_mutex); + if (w) + sync_array_object_signalled(); +} + +/** Wake up the enqueued exclusive lock waiter */ +void rw_lock_t::wakeup_wait_ex() +{ + mysql_mutex_lock(&wait_mutex); + mysql_cond_signal(&wait_ex_cond); + mysql_mutex_unlock(&wait_mutex); + sync_array_object_signalled(); } /******************************************************************//** @@ -291,23 +306,21 @@ rw_lock_s_lock_spin( ut_ad(rw_lock_validate(lock)); rw_lock_stats.rw_s_spin_wait_count.inc(); - lock_loop: - /* Spin waiting for the writer field to become free */ HMT_low(); ulint j = i; - while (i < srv_n_spin_wait_rounds && - lock->lock_word <= 0) { + for (; i < srv_n_spin_wait_rounds; i++) { + if (lock->lock_word > 0) { + goto available; + } ut_delay(srv_spin_wait_delay); - i++; } HMT_medium(); - if (i >= srv_n_spin_wait_rounds) { - os_thread_yield(); - } - + os_thread_yield(); +available: + HMT_medium(); spin_count += lint(i - j); /* We try once again to obtain the lock */ @@ -786,7 +799,6 @@ lock_loop: return; } else { - /* Spin waiting for the lock_word to become free */ ulint j = i; while (i < srv_n_spin_wait_rounds |