summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-06-11 17:30:33 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2020-06-11 17:30:33 +0300
commit757e756d6eeddf124c11a7a82a649bc6425ca355 (patch)
tree926e1fd6c90183238c5622321485ef0780856423
parent0af1b0bd213db385fa9073e0936d59f73bceddf6 (diff)
downloadmariadb-git-757e756d6eeddf124c11a7a82a649bc6425ca355.tar.gz
MDEV-22850 Reduce buf_pool.page_hash latch contention
For reads, the buf_pool.page_hash is protected by buf_pool.mutex or by the hash_lock. There is no need to compute or acquire hash_lock if we are not modifying the buf_pool.page_hash. However, the buf_pool.page_hash latch must be held exclusively when changing buf_page_t::in_file(), or if we desire to prevent buf_page_t::can_relocate() or buf_page_t::buf_fix_count() from changing. rw_lock_lock_word_decr(): Add a comment that explains the polling logic. buf_page_t::set_state(): When in_file() is to be changed, assert that an exclusive buf_pool.page_hash latch is being held. Unfortunately we cannot assert this for set_state(BUF_BLOCK_REMOVE_HASH) because set_corrupt_id() may already have been called. buf_LRU_free_page(): Check buf_page_t::can_relocate() before aqcuiring the hash_lock. buf_block_t::initialise(): Initialize also page.buf_fix_count(). buf_page_create(): Initialize buf_fix_count while not holding any mutex or hash_lock. Acquire the hash_lock only for the duration of inserting the block to the buf_pool.page_hash. buf_LRU_old_init(), buf_LRU_add_block(), buf_page_t::belongs_to_unzip_LRU(): Do not assert buf_page_t::in_file(), because buf_page_create() will invoke buf_LRU_add_block() before acquiring hash_lock and buf_page_t::set_state(). buf_pool_t::validate(): Rely on the buf_pool.mutex and do not unnecessarily acquire any buf_pool.page_hash latches. buf_page_init_for_read(): Clarify that we must acquire the hash_lock upfront in order to prevent a race with buf_pool_t::watch_remove().
-rw-r--r--storage/innobase/buf/buf0buf.cc28
-rw-r--r--storage/innobase/buf/buf0lru.cc12
-rw-r--r--storage/innobase/buf/buf0rea.cc8
-rw-r--r--storage/innobase/include/buf0buf.h42
-rw-r--r--storage/innobase/include/sync0rw.ic23
5 files changed, 88 insertions, 25 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index e17460a8cee..47b4eac0ed2 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -3776,15 +3776,16 @@ buf_page_try_get_func(
}
/** Initialize the block.
-@param page_id page id
-@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 */
-void buf_block_t::initialise(const page_id_t page_id, ulint zip_size)
+@param page_id page identifier
+@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
+@param fix initial buf_fix_count() */
+void buf_block_t::initialise(const page_id_t page_id, ulint zip_size,
+ uint32_t fix)
{
ut_ad(page.state() != BUF_BLOCK_FILE_PAGE);
buf_block_init_low(this);
lock_hash_val= lock_rec_hash(page_id.space(), page_id.page_no());
- page.init();
- page.id_= page_id;
+ page.init(page_id, fix);
page_zip_set_size(&page.zip, zip_size);
}
@@ -3803,11 +3804,9 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
ut_ad(page_id.space() != 0 || !zip_size);
buf_block_t *free_block= buf_LRU_get_free_block(false);
- free_block->initialise(page_id, zip_size);
+ free_block->initialise(page_id, zip_size, 1);
- rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
mutex_enter(&buf_pool.mutex);
- rw_lock_x_lock(hash_lock);
buf_block_t *block= reinterpret_cast<buf_block_t*>
(buf_pool.page_hash_get_low(page_id));
@@ -3816,7 +3815,6 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
!buf_pool.watch_is_sentinel(block->page))
{
/* Page can be found in buf_pool */
- rw_lock_x_unlock(hash_lock);
buf_LRU_block_free_non_file_page(free_block);
mutex_exit(&buf_pool.mutex);
@@ -3846,11 +3844,17 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
page_id.space(), page_id.page_no()));
block= free_block;
- buf_block_buf_fix_inc(block, __FILE__, __LINE__);
+
+ /* Duplicate buf_block_buf_fix_inc_func() */
+ ut_ad(block->page.buf_fix_count() == 1);
+ ut_ad(fsp_is_system_temporary(page_id.space()) ||
+ rw_lock_s_lock_nowait(block->debug_latch, __FILE__, __LINE__));
/* The block must be put to the LRU list */
- block->page.set_state(BUF_BLOCK_FILE_PAGE);
buf_LRU_add_block(&block->page, false);
+ rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
+ rw_lock_x_lock(hash_lock);
+ block->page.set_state(BUF_BLOCK_FILE_PAGE);
ut_d(block->page.in_page_hash= true);
HASH_INSERT(buf_page_t, hash, buf_pool.page_hash, page_id.fold(),
&block->page);
@@ -4401,7 +4405,6 @@ void buf_pool_t::validate()
ulint n_zip = 0;
mutex_enter(&mutex);
- page_hash_lock_all();
chunk_t* chunk = chunks;
@@ -4492,7 +4495,6 @@ void buf_pool_t::validate()
ut_ad(UT_LIST_GET_LEN(flush_list) == n_flushing);
- page_hash_unlock_all();
mutex_exit(&flush_list_mutex);
if (curr_size == old_size
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 293aba71320..885397f2f6c 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -940,7 +940,6 @@ static void buf_LRU_old_init()
bpage = UT_LIST_GET_PREV(LRU, bpage)) {
ut_ad(bpage->in_LRU_list);
- ut_ad(bpage->in_file());
/* This loop temporarily violates the
assertions of buf_page_t::set_old(). */
@@ -1066,7 +1065,6 @@ buf_LRU_add_block(
the start, regardless of this parameter */
{
ut_ad(mutex_own(&buf_pool.mutex));
- ut_a(bpage->in_file());
ut_ad(!bpage->in_LRU_list);
if (!old || (UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN)) {
@@ -1153,10 +1151,18 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
ut_ad(bpage->in_file());
ut_ad(bpage->in_LRU_list);
+ /* First, perform a quick check before we acquire hash_lock. */
+ if (!bpage->can_relocate()) {
+ return false;
+ }
+
+ /* We must hold an exclusive hash_lock to prevent
+ bpage->can_relocate() from changing due to a concurrent
+ execution of buf_page_get_low(). */
rw_lock_t* hash_lock = buf_pool.hash_lock_get(id);
rw_lock_x_lock(hash_lock);
- if (!bpage->can_relocate()) {
+ if (UNIV_UNLIKELY(!bpage->can_relocate())) {
/* Do not free buffer fixed and I/O-fixed blocks. */
goto func_exit;
}
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index e0322dee2da..9c3b347a236 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -121,6 +121,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
mutex_enter(&buf_pool.mutex);
+ /* We must acquire hash_lock this early to prevent
+ a race condition with buf_pool_t::watch_remove() */
rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
rw_lock_x_lock(hash_lock);
@@ -151,8 +153,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
buf_pool.watch_remove(hash_page);
}
- block->page.set_state(BUF_BLOCK_FILE_PAGE);
block->page.set_io_fix(BUF_IO_READ);
+ block->page.set_state(BUF_BLOCK_FILE_PAGE);
ut_ad(!block->page.in_page_hash);
ut_d(block->page.in_page_hash= true);
HASH_INSERT(buf_page_t, hash, buf_pool.page_hash, page_id.fold(), bpage);
@@ -202,8 +204,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
{
/* The block was added by some other thread. */
rw_lock_x_unlock(hash_lock);
- buf_buddy_free(data, zip_size);
- goto func_exit;
+ buf_buddy_free(data, zip_size);
+ goto func_exit;
}
}
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 522441259dd..825eb7631fe 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -995,6 +995,14 @@ public:
buf_fix_count_= buf_fix_count;
}
+ /** Initialize some more fields */
+ void init(page_id_t id, uint32_t buf_fix_count= 0)
+ {
+ init();
+ id_= id;
+ buf_fix_count_= buf_fix_count;
+ }
+
public:
const page_id_t &id() const { return id_; }
buf_page_state state() const { return state_; }
@@ -1010,8 +1018,7 @@ public:
/** @return if this belongs to buf_pool.unzip_LRU */
bool belongs_to_unzip_LRU() const
{
- ut_ad(in_file());
- return zip.data && state() == BUF_BLOCK_FILE_PAGE;
+ return zip.data && state() != BUF_BLOCK_ZIP_PAGE;
}
inline void add_buf_fix_count(uint32_t count);
@@ -1277,9 +1284,10 @@ struct buf_block_t{
ulint zip_size() const { return page.zip_size(); }
/** Initialize the block.
- @param page_id page id
- @param zip_size ROW_FORMAT=COMPRESSED page size, or 0 */
- void initialise(const page_id_t page_id, ulint zip_size);
+ @param page_id page identifier
+ @param zip_size ROW_FORMAT=COMPRESSED page size, or 0
+ @param fix initial buf_fix_count() */
+ void initialise(const page_id_t page_id, ulint zip_size, uint32_t fix= 0);
};
/**********************************************************************//**
@@ -2134,6 +2142,29 @@ inline void buf_page_t::set_buf_fix_count(uint32_t count)
inline void buf_page_t::set_state(buf_page_state state)
{
ut_ad(mutex_own(&buf_pool.mutex));
+#ifdef UNIV_DEBUG
+ switch (state) {
+ case BUF_BLOCK_REMOVE_HASH:
+ /* buf_pool_t::corrupted_evict() invokes set_corrupt_id()
+ before buf_LRU_free_one_page(), so we cannot assert that
+ we are holding the hash_lock. */
+ break;
+ case BUF_BLOCK_MEMORY:
+ if (!in_file()) break;
+ /* fall through */
+ case BUF_BLOCK_FILE_PAGE:
+ ut_ad(rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X));
+ break;
+ case BUF_BLOCK_NOT_USED:
+ if (!in_file()) break;
+ /* fall through */
+ case BUF_BLOCK_ZIP_PAGE:
+ ut_ad((this >= &buf_pool.watch[0] &&
+ this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]) ||
+ rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X));
+ break;
+ }
+#endif
state_= state;
}
@@ -2212,7 +2243,6 @@ inline bool buf_page_t::is_old() const
/** Set whether a block is old in buf_pool.LRU */
inline void buf_page_t::set_old(bool old)
{
- ut_ad(in_file());
ut_ad(mutex_own(&buf_pool.mutex));
ut_ad(in_LRU_list);
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index 18a6c69ece4..7fcac01e5ba 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -220,8 +220,31 @@ rw_lock_lock_word_decr(
return(true);
}
+
+ /* Note that lock_copy was reloaded above. We will
+ keep trying if a spurious conflict occurred, typically
+ caused by concurrent executions of
+ rw_lock_s_lock(). */
+
+#if 1 /* FIXME: MDEV-22871 Spurious contention between rw_lock_s_lock() */
+
+ /* When the number of concurrently executing threads
+ exceeds the number of available processor cores,
+ multiple buf_pool.page_hash S-latch requests would
+ conflict here, mostly in buf_page_get_low(). We should
+ implement a simpler rw-lock where the S-latch
+ acquisition would be a simple fetch_add(1) followed by
+ either an optional load() loop to wait for the X-latch
+ to be released, or a fetch_sub(1) and a retry.
+
+ For now, we work around the problem with a delay in
+ this loop. It helped a little on some systems and was
+ reducing performance on others. */
(void) LF_BACKOFF();
+#endif
}
+
+ /* A real conflict was detected. */
return(false);
}