summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorVladislav Vaintroub <wlad@mariadb.com>2022-02-17 10:19:20 +0100
committerVladislav Vaintroub <wlad@mariadb.com>2022-02-17 10:24:14 +0100
commit8bc5bf2cb6a235e55a8fd9aaaff5d2b18296d37b (patch)
treeb1e740c744ced2f5577f022cf6817bb7ced8cb4e /storage
parent497809d26df4ebef079a2dfa6357b1fdd5aba9bd (diff)
downloadmariadb-git-8bc5bf2cb6a235e55a8fd9aaaff5d2b18296d37b.tar.gz
MDEV-26789 Fix stall of group commit waiters
Fixed a condition where designated group commit lead was woken in release, but returned early without trying to take over the lock. So, the group commit locks would be unowned, and the waiters could be stalled, until another thread comes that would on whatever reasons flush the redo log. Also, use better criteria for choosing potential next group commit lead.
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/log/log0log.cc12
-rw-r--r--storage/innobase/log/log0sync.cc27
2 files changed, 29 insertions, 10 deletions
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index bedb52436e2..09f99a5cf02 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -822,9 +822,12 @@ void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key,
repeat:
lsn_t ret_lsn1= 0, ret_lsn2= 0;
- if (flush_to_disk &&
- flush_lock.acquire(lsn, callback) != group_commit_lock::ACQUIRED)
- return;
+ if (flush_to_disk)
+ {
+ if (flush_lock.acquire(lsn, callback) != group_commit_lock::ACQUIRED)
+ return;
+ flush_lock.set_pending(log_sys.get_lsn());
+ }
if (write_lock.acquire(lsn, flush_to_disk ? nullptr : callback) ==
group_commit_lock::ACQUIRED)
@@ -832,7 +835,8 @@ repeat:
mysql_mutex_lock(&log_sys.mutex);
lsn_t write_lsn= log_sys.get_lsn();
write_lock.set_pending(write_lsn);
-
+ if (flush_to_disk)
+ flush_lock.set_pending(write_lsn);
log_write(rotate_key);
ut_a(log_sys.write_lsn == write_lsn);
diff --git a/storage/innobase/log/log0sync.cc b/storage/innobase/log/log0sync.cc
index 65cc1d0633c..6b14d1d3591 100644
--- a/storage/innobase/log/log0sync.cc
+++ b/storage/innobase/log/log0sync.cc
@@ -222,7 +222,7 @@ group_commit_lock::lock_return_code group_commit_lock::acquire(value_type num, c
thread_local_waiter.m_value = num;
thread_local_waiter.m_group_commit_leader= false;
std::unique_lock<std::mutex> lk(m_mtx, std::defer_lock);
- while (num > value())
+ while (num > value() || thread_local_waiter.m_group_commit_leader)
{
lk.lock();
@@ -232,11 +232,9 @@ group_commit_lock::lock_return_code group_commit_lock::acquire(value_type num, c
{
lk.unlock();
do_completion_callback(callback);
- thread_local_waiter.m_group_commit_leader=false;
return lock_return_code::EXPIRED;
}
- thread_local_waiter.m_group_commit_leader= false;
if (!m_lock)
{
/* Take the lock, become group commit leader.*/
@@ -249,17 +247,23 @@ group_commit_lock::lock_return_code group_commit_lock::acquire(value_type num, c
return lock_return_code::ACQUIRED;
}
- if (callback && m_waiters_list)
+ if (callback && (m_waiters_list || num <= pending()))
{
/*
- We need to have at least one waiter,
- so it can become the new group commit leader.
+ If num > pending(), we have a good candidate for the next group
+ commit lead, that will be taking over the lock after current owner
+ releases it. We put current thread into waiter's list so it sleeps
+ and can be signaled and marked as group commit lead during lock release.
+
+ For this to work well, pending() must deliver a good approximation for N
+ in the next call to group_commit_lock::release(N).
*/
m_pending_callbacks.push_back({num, *callback});
return lock_return_code::CALLBACK_QUEUED;
}
/* Add yourself to waiters list.*/
+ thread_local_waiter.m_group_commit_leader= false;
thread_local_waiter.m_next = m_waiters_list;
m_waiters_list = &thread_local_waiter;
lk.unlock();
@@ -369,6 +373,17 @@ group_commit_lock::value_type group_commit_lock::release(value_type num)
lk.unlock();
+ /*
+ Release designated next group commit lead first,
+ to minimize spurious wakeups.
+ */
+ if (wakeup_list && wakeup_list->m_group_commit_leader)
+ {
+ next = wakeup_list->m_next;
+ wakeup_list->m_sema.wake();
+ wakeup_list= next;
+ }
+
for (size_t i = 0; i < callback_count; i++)
callbacks[i].m_callback(callbacks[i].m_param);