summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2023-03-16 17:09:08 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2023-03-16 17:09:08 +0200
commit9593cccf285ee348fc9a2743c1ed7d24c768439b (patch)
treee5ff5b1ea108876e817faedf06fe1743f1fde53b /storage
parent4105017a5832c3486002c4ec0c67005ceb5ab88b (diff)
downloadmariadb-git-9593cccf285ee348fc9a2743c1ed7d24c768439b.tar.gz
MDEV-26055: Improve adaptive flushing
Adaptive flushing is enabled by setting innodb_max_dirty_pages_pct_lwm>0 (not default) and innodb_adaptive_flushing=ON (default). There is also the parameter innodb_adaptive_flushing_lwm (default: 10 per cent of the log capacity). It should enable some adaptive flushing even when innodb_max_dirty_pages_pct_lwm=0. That is not being changed here. This idea was first presented by Inaam Rana several years ago, and I discussed it with Jean-François Gagné at FOSDEM 2023. buf_flush_page_cleaner(): When we are not near the log capacity limit (neither buf_flush_async_lsn nor buf_flush_sync_lsn are set), also try to move clean blocks from the buf_pool.LRU list to buf_pool.free or initiate writes (but not the eviction) of dirty blocks, until the remaining I/O capacity has been consumed. buf_flush_LRU_list_batch(): Add the parameter bool evict, to specify whether dirty least recently used pages (from buf_pool.LRU) should be evicted immediately after they have been written out. Callers outside buf_flush_page_cleaner() will pass evict=true, to retain the existing behaviour. buf_do_LRU_batch(): Add the parameter bool evict. Return counts of evicted and flushed pages. buf_flush_LRU(): Add the parameter bool evict. Assume that the caller holds buf_pool.mutex and will invoke buf_dblwr.flush_buffered_writes() afterwards. buf_flush_list_holding_mutex(): A low-level variant of buf_flush_list() whose caller must hold buf_pool.mutex and invoke buf_dblwr.flush_buffered_writes() afterwards. buf_flush_wait_batch_end_acquiring_mutex(): Remove. It is enough to have buf_flush_wait_batch_end(). page_cleaner_flush_pages_recommendation(): Avoid some floating-point arithmetics. buf_flush_page(), buf_flush_check_neighbor(), buf_flush_check_neighbors(), buf_flush_try_neighbors(): Rename the parameter "bool lru" to "bool evict". buf_free_from_unzip_LRU_list_batch(): Remove the parameter. Only actual page writes will contribute towards the limit. buf_LRU_free_page(): Evict freed pages of temporary tables. buf_pool.done_free: Broadcast whenever a block is freed (and buf_pool.try_LRU_scan is set). buf_pool_t::io_buf_t::reserve(): Retry indefinitely. During the test encryption.innochecksum we easily run out of these buffers for PAGE_COMPRESSED or ENCRYPTED pages. Tested by Matthias Leich and Axel Schwenke
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/buf/buf0buf.cc194
-rw-r--r--storage/innobase/buf/buf0flu.cc392
-rw-r--r--storage/innobase/buf/buf0lru.cc61
-rw-r--r--storage/innobase/include/buf0buf.h54
-rw-r--r--storage/innobase/include/buf0flu.h13
5 files changed, 360 insertions, 354 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 47cc915a11d..510872c142e 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -408,7 +408,6 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage,
if (id.space() == SRV_TMP_SPACE_ID
&& innodb_encrypt_temporary_tables) {
slot = buf_pool.io_buf_reserve();
- ut_a(slot);
slot->allocate();
bool ok = buf_tmp_page_decrypt(slot->crypt_buf, dst_frame);
slot->release();
@@ -431,7 +430,6 @@ decompress:
}
slot = buf_pool.io_buf_reserve();
- ut_a(slot);
slot->allocate();
decompress_with_slot:
@@ -455,7 +453,6 @@ decrypt_failed:
}
slot = buf_pool.io_buf_reserve();
- ut_a(slot);
slot->allocate();
/* decrypt using crypt_buf to dst_frame */
@@ -1293,6 +1290,41 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
return(true); /* free_list was enough */
}
+void buf_pool_t::io_buf_t::create(ulint n_slots)
+{
+ this->n_slots= n_slots;
+ slots= static_cast<buf_tmp_buffer_t*>
+ (ut_malloc_nokey(n_slots * sizeof *slots));
+ memset((void*) slots, 0, n_slots * sizeof *slots);
+}
+
+void buf_pool_t::io_buf_t::close()
+{
+ for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
+ {
+ aligned_free(s->crypt_buf);
+ aligned_free(s->comp_buf);
+ }
+ ut_free(slots);
+ slots= nullptr;
+ n_slots= 0;
+}
+
+buf_tmp_buffer_t *buf_pool_t::io_buf_t::reserve()
+{
+ for (;;)
+ {
+ for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
+ if (s->acquire())
+ return s;
+ os_aio_wait_until_no_pending_writes();
+ for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
+ if (s->acquire())
+ return s;
+ os_aio_wait_until_no_pending_reads();
+ }
+}
+
/** Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status
to the specified string. The format and the following parameters are the
same as the ones used for printf(3).
@@ -1359,21 +1391,23 @@ inline bool buf_pool_t::withdraw_blocks()
block = next_block;
}
- mysql_mutex_unlock(&mutex);
/* reserve free_list length */
if (UT_LIST_GET_LEN(withdraw) < withdraw_target) {
buf_flush_LRU(
std::max<ulint>(withdraw_target
- UT_LIST_GET_LEN(withdraw),
- srv_LRU_scan_depth));
- buf_flush_wait_batch_end_acquiring_mutex(true);
+ srv_LRU_scan_depth),
+ true);
+ mysql_mutex_unlock(&buf_pool.mutex);
+ buf_dblwr.flush_buffered_writes();
+ mysql_mutex_lock(&buf_pool.mutex);
+ buf_flush_wait_batch_end(true);
}
/* relocate blocks/buddies in withdrawn area */
ulint count2 = 0;
- mysql_mutex_lock(&mutex);
buf_pool_mutex_exit_forbid();
for (buf_page_t* bpage = UT_LIST_GET_FIRST(LRU), *next_bpage;
bpage; bpage = next_bpage) {
@@ -2380,11 +2414,6 @@ buf_page_get_low(
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_SX_LATCH)
|| (rw_latch == RW_NO_LATCH));
- ut_ad(!allow_ibuf_merge
- || mode == BUF_GET
- || mode == BUF_GET_POSSIBLY_FREED
- || mode == BUF_GET_IF_IN_POOL
- || mode == BUF_GET_IF_IN_POOL_OR_WATCH);
if (err) {
*err = DB_SUCCESS;
@@ -2392,15 +2421,15 @@ buf_page_get_low(
#ifdef UNIV_DEBUG
switch (mode) {
- case BUF_PEEK_IF_IN_POOL:
+ default:
+ ut_ad(!allow_ibuf_merge);
+ ut_ad(mode == BUF_PEEK_IF_IN_POOL);
+ break;
+ case BUF_GET_POSSIBLY_FREED:
case BUF_GET_IF_IN_POOL:
/* The caller may pass a dummy page size,
because it does not really matter. */
break;
- default:
- MY_ASSERT_UNREACHABLE();
- case BUF_GET_POSSIBLY_FREED:
- break;
case BUF_GET:
case BUF_GET_IF_IN_POOL_OR_WATCH:
ut_ad(!mtr->is_freeing_tree());
@@ -2547,11 +2576,12 @@ ignore_block:
return nullptr;
}
} else if (mode != BUF_PEEK_IF_IN_POOL) {
- } else if (!mtr) {
+ } else if (!mtr) {
ut_ad(!block->page.oldest_modification());
mysql_mutex_lock(&buf_pool.mutex);
block->unfix();
+free_unfixed_block:
if (!buf_LRU_free_page(&block->page, true)) {
ut_ad(0);
}
@@ -2667,20 +2697,19 @@ wait_for_unfix:
/* Decompress the page while not holding
buf_pool.mutex. */
- auto ok = buf_zip_decompress(block, false);
- block->page.read_unfix(state);
- state = block->page.state();
- block->page.lock.x_unlock();
+ const auto ok = buf_zip_decompress(block, false);
--buf_pool.n_pend_unzip;
-
if (!ok) {
- /* FIXME: Evict the corrupted
- ROW_FORMAT=COMPRESSED page! */
-
if (err) {
*err = DB_PAGE_CORRUPTED;
}
- return nullptr;
+ mysql_mutex_lock(&buf_pool.mutex);
+ }
+ state = block->page.read_unfix(state);
+ block->page.lock.x_unlock();
+
+ if (!ok) {
+ goto free_unfixed_block;
}
}
@@ -2886,72 +2915,73 @@ buf_page_get_gen(
dberr_t* err,
bool allow_ibuf_merge)
{
- if (buf_block_t *block= recv_sys.recover(page_id))
+ buf_block_t *block= recv_sys.recover(page_id);
+ if (UNIV_LIKELY(!block))
+ return buf_page_get_low(page_id, zip_size, rw_latch,
+ guess, mode, mtr, err, allow_ibuf_merge);
+ else if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1)))
{
- if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1)))
- {
- corrupted:
- if (err)
- *err= DB_CORRUPTION;
- return nullptr;
- }
- /* Recovery is a special case; we fix() before acquiring lock. */
- auto s= block->page.fix();
- ut_ad(s >= buf_page_t::FREED);
- /* The block may be write-fixed at this point because we are not
- holding a lock, but it must not be read-fixed. */
- ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
+ corrupted:
if (err)
- *err= DB_SUCCESS;
- const bool must_merge= allow_ibuf_merge &&
- ibuf_page_exists(page_id, block->zip_size());
+ *err= DB_CORRUPTION;
+ return nullptr;
+ }
+ /* Recovery is a special case; we fix() before acquiring lock. */
+ auto s= block->page.fix();
+ ut_ad(s >= buf_page_t::FREED);
+ /* The block may be write-fixed at this point because we are not
+ holding a lock, but it must not be read-fixed. */
+ ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
+ if (err)
+ *err= DB_SUCCESS;
+ const bool must_merge= allow_ibuf_merge &&
+ ibuf_page_exists(page_id, block->zip_size());
+ if (s < buf_page_t::UNFIXED)
+ {
+ got_freed_page:
+ ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL);
+ mysql_mutex_lock(&buf_pool.mutex);
+ block->page.unfix();
+ buf_LRU_free_page(&block->page, true);
+ mysql_mutex_unlock(&buf_pool.mutex);
+ goto corrupted;
+ }
+ else if (must_merge &&
+ fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX &&
+ page_is_leaf(block->page.frame))
+ {
+ block->page.lock.x_lock();
+ s= block->page.state();
+ ut_ad(s > buf_page_t::FREED);
+ ut_ad(s < buf_page_t::READ_FIX);
if (s < buf_page_t::UNFIXED)
{
- got_freed_page:
- ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL);
- block->page.unfix();
- goto corrupted;
+ block->page.lock.x_unlock();
+ goto got_freed_page;
}
- else if (must_merge &&
- fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX &&
- page_is_leaf(block->page.frame))
+ else
{
- block->page.lock.x_lock();
- s= block->page.state();
- ut_ad(s > buf_page_t::FREED);
- ut_ad(s < buf_page_t::READ_FIX);
- if (s < buf_page_t::UNFIXED)
+ if (block->page.is_ibuf_exist())
+ block->page.clear_ibuf_exist();
+ if (dberr_t e=
+ ibuf_merge_or_delete_for_page(block, page_id, block->zip_size()))
{
- block->page.lock.x_unlock();
- goto got_freed_page;
- }
- else
- {
- if (block->page.is_ibuf_exist())
- block->page.clear_ibuf_exist();
- if (dberr_t e=
- ibuf_merge_or_delete_for_page(block, page_id, block->zip_size()))
- {
- if (err)
- *err= e;
- buf_pool.corrupted_evict(&block->page, s);
- return nullptr;
- }
+ if (err)
+ *err= e;
+ buf_pool.corrupted_evict(&block->page, s);
+ return nullptr;
}
+ }
- if (rw_latch == RW_X_LATCH)
- {
- mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
- return block;
- }
- block->page.lock.x_unlock();
+ if (rw_latch == RW_X_LATCH)
+ {
+ mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
+ return block;
}
- mtr->page_lock(block, rw_latch);
- return block;
+ block->page.lock.x_unlock();
}
-
- return buf_page_get_low(page_id, zip_size, rw_latch,
- guess, mode, mtr, err, allow_ibuf_merge);
+ mtr->page_lock(block, rw_latch);
+ return block;
}
/********************************************************************//**
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index cca921a9275..70e1595e00e 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -380,13 +380,12 @@ void buf_page_write_complete(const IORequest &request)
if (request.is_LRU())
{
buf_LRU_free_page(bpage, true);
+ buf_pool.try_LRU_scan= true;
+ pthread_cond_signal(&buf_pool.done_free);
ut_ad(buf_pool.n_flush_LRU_);
if (!--buf_pool.n_flush_LRU_)
- {
pthread_cond_broadcast(&buf_pool.done_flush_LRU);
- pthread_cond_signal(&buf_pool.done_free);
- }
}
else
{
@@ -763,17 +762,17 @@ inline void buf_pool_t::release_freed_page(buf_page_t *bpage)
}
/** Write a flushable page to a file. buf_pool.mutex must be held.
-@param lru true=buf_pool.LRU; false=buf_pool.flush_list
+@param evict whether to evict the page on write completion
@param space tablespace
@return whether the page was flushed and buf_pool.mutex was released */
-inline bool buf_page_t::flush(bool lru, fil_space_t *space)
+inline bool buf_page_t::flush(bool evict, fil_space_t *space)
{
ut_ad(in_file());
ut_ad(in_LRU_list);
ut_ad((space->purpose == FIL_TYPE_TEMPORARY) ==
(space == fil_system.temp_space));
ut_ad(space->referenced());
- ut_ad(lru || space != fil_system.temp_space);
+ ut_ad(evict || space != fil_system.temp_space);
if (!lock.u_lock_try(true))
return false;
@@ -801,7 +800,7 @@ inline bool buf_page_t::flush(bool lru, fil_space_t *space)
Thus, it cannot be relocated or removed. */
DBUG_PRINT("ib_buf", ("%s %u page %u:%u",
- lru ? "LRU" : "flush_list",
+ evict ? "LRU" : "flush_list",
id().space(), id().page_no()));
ut_d(const auto f=) zip.fix.fetch_add(WRITE_FIX - UNFIXED);
ut_ad(f >= UNFIXED);
@@ -809,7 +808,7 @@ inline bool buf_page_t::flush(bool lru, fil_space_t *space)
ut_ad(space == fil_system.temp_space
? oldest_modification() == 2
: oldest_modification() > 2);
- if (lru)
+ if (evict)
{
ut_ad(buf_pool.n_flush_LRU_ < ULINT_UNDEFINED);
buf_pool.n_flush_LRU_++;
@@ -831,7 +830,7 @@ inline bool buf_page_t::flush(bool lru, fil_space_t *space)
#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32
size_t orig_size;
#endif
- IORequest::Type type= lru ? IORequest::WRITE_LRU : IORequest::WRITE_ASYNC;
+ IORequest::Type type= evict ? IORequest::WRITE_LRU : IORequest::WRITE_ASYNC;
buf_tmp_buffer_t *slot= nullptr;
if (UNIV_UNLIKELY(!frame)) /* ROW_FORMAT=COMPRESSED */
@@ -875,7 +874,7 @@ inline bool buf_page_t::flush(bool lru, fil_space_t *space)
{
switch (space->chain.start->punch_hole) {
case 1:
- type= lru ? IORequest::PUNCH_LRU : IORequest::PUNCH;
+ type= evict ? IORequest::PUNCH_LRU : IORequest::PUNCH;
break;
case 2:
size= orig_size;
@@ -912,9 +911,10 @@ inline bool buf_page_t::flush(bool lru, fil_space_t *space)
/** Check whether a page can be flushed from the buf_pool.
@param id page identifier
@param fold id.fold()
-@param lru true=buf_pool.LRU; false=buf_pool.flush_list
+@param evict true=buf_pool.LRU; false=buf_pool.flush_list
@return whether the page can be flushed */
-static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, bool lru)
+static bool buf_flush_check_neighbor(const page_id_t id, ulint fold,
+ bool evict)
{
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(fold == id.fold());
@@ -926,9 +926,9 @@ static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, bool lru)
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
return false;
- /* We avoid flushing 'non-old' blocks in an LRU flush, because the
+ /* We avoid flushing 'non-old' blocks in an eviction flush, because the
flushed blocks are soon freed */
- if (lru && !bpage->is_old())
+ if (evict && !bpage->is_old())
return false;
return bpage->oldest_modification() > 1 && bpage->ready_for_flush();
@@ -938,11 +938,11 @@ static bool buf_flush_check_neighbor(const page_id_t id, ulint fold, bool lru)
@param space tablespace
@param id page identifier of a dirty page
@param contiguous whether to consider contiguous areas of pages
-@param lru true=buf_pool.LRU; false=buf_pool.flush_list
+@param evict true=buf_pool.LRU; false=buf_pool.flush_list
@return last page number that can be flushed */
static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
page_id_t &id, bool contiguous,
- bool lru)
+ bool evict)
{
ut_ad(id.page_no() < space.size +
(space.physical_size() == 2048 ? 1
@@ -975,7 +975,7 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
for (page_id_t i= id - 1;; --i)
{
fold--;
- if (!buf_flush_check_neighbor(i, fold, lru))
+ if (!buf_flush_check_neighbor(i, fold, evict))
{
low= i + 1;
break;
@@ -991,7 +991,7 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space,
while (++i < high)
{
++fold;
- if (!buf_flush_check_neighbor(i, fold, lru))
+ if (!buf_flush_check_neighbor(i, fold, evict))
break;
}
@@ -1059,20 +1059,20 @@ and also write zeroes or punch the hole for the freed ranges of pages.
@param space tablespace
@param page_id page identifier
@param contiguous whether to consider contiguous areas of pages
-@param lru true=buf_pool.LRU; false=buf_pool.flush_list
+@param evict true=buf_pool.LRU; false=buf_pool.flush_list
@param n_flushed number of pages flushed so far in this batch
@param n_to_flush maximum number of pages we are allowed to flush
@return number of pages flushed */
static ulint buf_flush_try_neighbors(fil_space_t *space,
const page_id_t page_id,
- bool contiguous, bool lru,
+ bool contiguous, bool evict,
ulint n_flushed, ulint n_to_flush)
{
ut_ad(space->id == page_id.space());
ulint count= 0;
page_id_t id= page_id;
- page_id_t high= buf_flush_check_neighbors(*space, id, contiguous, lru);
+ page_id_t high= buf_flush_check_neighbors(*space, id, contiguous, evict);
ut_ad(page_id >= id);
ut_ad(page_id < high);
@@ -1096,13 +1096,13 @@ static ulint buf_flush_try_neighbors(fil_space_t *space,
if (buf_page_t *bpage= buf_pool.page_hash.get(id, chain))
{
ut_ad(bpage->in_file());
- /* We avoid flushing 'non-old' blocks in an LRU flush,
+ /* We avoid flushing 'non-old' blocks in an eviction flush,
because the flushed blocks are soon freed */
- if (!lru || id == page_id || bpage->is_old())
+ if (!evict || id == page_id || bpage->is_old())
{
if (!buf_pool.watch_is_sentinel(*bpage) &&
bpage->oldest_modification() > 1 && bpage->ready_for_flush() &&
- bpage->flush(lru, space))
+ bpage->flush(evict, space))
{
++count;
continue;
@@ -1128,12 +1128,8 @@ This utility moves the uncompressed frames of pages to the free list.
Note that this function does not actually flush any data to disk. It
just detaches the uncompressed frames from the compressed pages at the
tail of the unzip_LRU and puts those freed frames in the free list.
-Note that it is a best effort attempt and it is not guaranteed that
-after a call to this function there will be 'max' blocks in the free
-list.
-@param[in] max desired number of blocks in the free_list
@return number of blocks moved to the free list. */
-static ulint buf_free_from_unzip_LRU_list_batch(ulint max)
+static ulint buf_free_from_unzip_LRU_list_batch()
{
ulint scanned = 0;
ulint count = 0;
@@ -1143,7 +1139,6 @@ static ulint buf_free_from_unzip_LRU_list_batch(ulint max)
buf_block_t* block = UT_LIST_GET_LAST(buf_pool.unzip_LRU);
while (block
- && count < max
&& UT_LIST_GET_LEN(buf_pool.free) < srv_LRU_scan_depth
&& UT_LIST_GET_LEN(buf_pool.unzip_LRU)
> UT_LIST_GET_LEN(buf_pool.LRU) / 10) {
@@ -1214,10 +1209,13 @@ static void buf_flush_discard_page(buf_page_t *bpage)
buf_LRU_free_page(bpage, true);
}
-/** Flush dirty blocks from the end of the LRU list.
-@param max maximum number of blocks to make available in buf_pool.free
-@param n counts of flushed and evicted pages */
-static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
+/** Flush dirty blocks from the end buf_pool.LRU,
+and move clean blocks to buf_pool.free.
+@param max maximum number of blocks to flush
+@param evict whether dirty pages are to be evicted after flushing them
+@param n counts of flushed and evicted pages */
+static void buf_flush_LRU_list_batch(ulint max, bool evict,
+ flush_counters_t *n)
{
ulint scanned= 0;
ulint free_limit= srv_LRU_scan_depth;
@@ -1229,6 +1227,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN
? 0 : srv_flush_neighbors;
fil_space_t *space= nullptr;
+ bool do_evict= evict;
uint32_t last_space_id= FIL_NULL;
static_assert(FIL_NULL > SRV_TMP_SPACE_ID, "consistency");
static_assert(FIL_NULL > SRV_SPACE_ID_UPPER_BOUND, "consistency");
@@ -1236,8 +1235,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.LRU);
bpage &&
((UT_LIST_GET_LEN(buf_pool.LRU) > BUF_LRU_MIN_LEN &&
- UT_LIST_GET_LEN(buf_pool.free) < free_limit &&
- n->flushed + n->evicted < max) ||
+ UT_LIST_GET_LEN(buf_pool.free) < free_limit) ||
recv_recovery_is_on()); ++scanned)
{
buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage);
@@ -1257,8 +1255,8 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
}
else if (state < buf_page_t::READ_FIX)
{
- /* Block is ready for flush. Dispatch an IO request. The IO
- helper thread will put it on free list in IO completion routine. */
+ /* Block is ready for flush. Dispatch an IO request.
+ If evict=true, the page will be evicted by buf_page_write_complete(). */
const page_id_t page_id(bpage->id());
const uint32_t space_id= page_id.space();
if (!space || space->id != space_id)
@@ -1271,6 +1269,9 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
space->release();
auto p= buf_flush_space(space_id);
space= p.first;
+ /* For the temporary tablespace, LRU flushing will always
+ evict pages upon completing the write. */
+ do_evict= evict || space == fil_system.temp_space;
last_space_id= space_id;
mysql_mutex_lock(&buf_pool.mutex);
if (p.second)
@@ -1292,11 +1293,13 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
{
mysql_mutex_unlock(&buf_pool.mutex);
n->flushed+= buf_flush_try_neighbors(space, page_id, neighbors == 1,
- true, n->flushed, max);
+ do_evict, n->flushed, max);
reacquire_mutex:
mysql_mutex_lock(&buf_pool.mutex);
}
- else if (bpage->flush(true, space))
+ else if (n->flushed >= max && !recv_recovery_is_on())
+ break;
+ else if (bpage->flush(do_evict, space))
{
++n->flushed;
goto reacquire_mutex;
@@ -1324,26 +1327,20 @@ reacquire_mutex:
/** Flush and move pages from LRU or unzip_LRU list to the free list.
Whether LRU or unzip_LRU is used depends on the state of the system.
-@param max maximum number of blocks to make available in buf_pool.free
-@return number of flushed pages */
-static ulint buf_do_LRU_batch(ulint max)
+@param max maximum number of blocks to flush
+@param evict whether dirty pages are to be evicted after flushing them
+@param n counts of flushed and evicted pages */
+static void buf_do_LRU_batch(ulint max, bool evict, flush_counters_t *n)
{
- const ulint n_unzip_LRU_evicted= buf_LRU_evict_from_unzip_LRU()
- ? buf_free_from_unzip_LRU_list_batch(max)
- : 0;
- flush_counters_t n;
- n.flushed= 0;
- n.evicted= n_unzip_LRU_evicted;
- buf_flush_LRU_list_batch(max, &n);
- mysql_mutex_assert_owner(&buf_pool.mutex);
-
- if (const ulint evicted= n.evicted - n_unzip_LRU_evicted)
- buf_lru_freed_page_count+= evicted;
+ if (buf_LRU_evict_from_unzip_LRU())
+ buf_free_from_unzip_LRU_list_batch();
+ n->evicted= 0;
+ n->flushed= 0;
+ buf_flush_LRU_list_batch(max, evict, n);
- if (n.flushed)
- buf_lru_flush_page_count+= n.flushed;
-
- return n.flushed;
+ mysql_mutex_assert_owner(&buf_pool.mutex);
+ buf_lru_freed_page_count+= n->evicted;
+ buf_lru_flush_page_count+= n->flushed;
}
/** This utility flushes dirty blocks from the end of the flush_list.
@@ -1398,7 +1395,7 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
from buf_pool.flush_list must adjust the hazard pointer.
Note: A concurrent execution of buf_flush_list_space() may
- terminate this scan prematurely. The buf_pool.n_flush_list()
+ terminate this scan prematurely. The buf_pool.n_flush_list_
should prevent multiple threads from executing
buf_do_flush_list_batch() concurrently,
but buf_flush_list_space() is ignoring that. */
@@ -1505,46 +1502,53 @@ void buf_flush_wait_batch_end(bool lru)
}
/** Write out dirty blocks from buf_pool.flush_list.
+The caller must invoke buf_dblwr.flush_buffered_writes()
+after releasing buf_pool.mutex.
@param max_n wished maximum mumber of blocks flushed
@param lsn buf_pool.get_oldest_modification(LSN_MAX) target
@return the number of processed pages
@retval 0 if a buf_pool.flush_list batch is already running */
-static ulint buf_flush_list(ulint max_n= ULINT_UNDEFINED, lsn_t lsn= LSN_MAX)
+static ulint buf_flush_list_holding_mutex(ulint max_n= ULINT_UNDEFINED,
+ lsn_t lsn= LSN_MAX)
{
ut_ad(lsn);
+ mysql_mutex_assert_owner(&buf_pool.mutex);
- if (buf_pool.n_flush_list())
+ if (buf_pool.n_flush_list_)
return 0;
- mysql_mutex_lock(&buf_pool.mutex);
- const bool running= buf_pool.n_flush_list_ != 0;
/* FIXME: we are performing a dirty read of buf_pool.flush_list.count
while not holding buf_pool.flush_list_mutex */
- if (running || !UT_LIST_GET_LEN(buf_pool.flush_list))
+ if (!UT_LIST_GET_LEN(buf_pool.flush_list))
{
- if (!running)
- pthread_cond_broadcast(&buf_pool.done_flush_list);
- mysql_mutex_unlock(&buf_pool.mutex);
+ pthread_cond_broadcast(&buf_pool.done_flush_list);
return 0;
}
buf_pool.n_flush_list_++;
const ulint n_flushed= buf_do_flush_list_batch(max_n, lsn);
- const ulint n_flushing= --buf_pool.n_flush_list_;
-
- buf_pool.try_LRU_scan= true;
-
- mysql_mutex_unlock(&buf_pool.mutex);
-
- if (!n_flushing)
+ if (!--buf_pool.n_flush_list_)
pthread_cond_broadcast(&buf_pool.done_flush_list);
- buf_dblwr.flush_buffered_writes();
-
DBUG_PRINT("ib_buf", ("flush_list completed, " ULINTPF " pages", n_flushed));
return n_flushed;
}
+/** Write out dirty blocks from buf_pool.flush_list.
+@param max_n wished maximum mumber of blocks flushed
+@param lsn buf_pool.get_oldest_modification(LSN_MAX) target
+@return the number of processed pages
+@retval 0 if a buf_pool.flush_list batch is already running */
+static ulint buf_flush_list(ulint max_n= ULINT_UNDEFINED,
+ lsn_t lsn= LSN_MAX)
+{
+ mysql_mutex_lock(&buf_pool.mutex);
+ ulint n= buf_flush_list_holding_mutex(max_n, lsn);
+ mysql_mutex_unlock(&buf_pool.mutex);
+ buf_dblwr.flush_buffered_writes();
+ return n;
+}
+
/** Try to flush all the dirty pages that belong to a given tablespace.
@param space tablespace
@param n_flushed number of pages written
@@ -1642,7 +1646,7 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed)
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
buf_pool.try_LRU_scan= true;
-
+ pthread_cond_broadcast(&buf_pool.done_free);
mysql_mutex_unlock(&buf_pool.mutex);
if (acquired)
@@ -1656,43 +1660,41 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed)
return may_have_skipped;
}
-/** Write out dirty blocks from buf_pool.LRU.
+/** Write out dirty blocks from buf_pool.LRU,
+and move clean blocks to buf_pool.free.
+The caller must invoke buf_dblwr.flush_buffered_writes()
+after releasing buf_pool.mutex.
@param max_n wished maximum mumber of blocks flushed
-@return the number of processed pages
+@param evict whether to evict pages after flushing
+@return evict ? number of processed pages : number of pages written
@retval 0 if a buf_pool.LRU batch is already running */
-ulint buf_flush_LRU(ulint max_n)
+ulint buf_flush_LRU(ulint max_n, bool evict)
{
- if (buf_pool.n_flush_LRU())
- return 0;
-
- log_buffer_flush_to_disk();
+ mysql_mutex_assert_owner(&buf_pool.mutex);
- mysql_mutex_lock(&buf_pool.mutex);
- if (buf_pool.n_flush_LRU_)
+ if (evict)
{
- mysql_mutex_unlock(&buf_pool.mutex);
- return 0;
+ if (buf_pool.n_flush_LRU_)
+ return 0;
+ buf_pool.n_flush_LRU_= 1;
}
- buf_pool.n_flush_LRU_++;
- ulint n_flushed= buf_do_LRU_batch(max_n);
-
- const ulint n_flushing= --buf_pool.n_flush_LRU_;
-
- buf_pool.try_LRU_scan= true;
-
- mysql_mutex_unlock(&buf_pool.mutex);
+ flush_counters_t n;
+ buf_do_LRU_batch(max_n, evict, &n);
- if (!n_flushing)
+ if (n.evicted)
{
- pthread_cond_broadcast(&buf_pool.done_flush_LRU);
+ buf_pool.try_LRU_scan= true;
pthread_cond_signal(&buf_pool.done_free);
}
- buf_dblwr.flush_buffered_writes();
+ if (!evict)
+ return n.flushed;
- DBUG_PRINT("ib_buf", ("LRU flush completed, " ULINTPF " pages", n_flushed));
- return n_flushed;
+ if (!--buf_pool.n_flush_LRU_)
+ pthread_cond_broadcast(&buf_pool.done_flush_LRU);
+
+ return n.evicted + n.flushed;
}
/** Initiate a log checkpoint, discarding the start of the log.
@@ -1854,7 +1856,9 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
{
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
ulint n_pages= buf_flush_list(srv_max_io_capacity, sync_lsn);
- buf_flush_wait_batch_end_acquiring_mutex(false);
+ mysql_mutex_lock(&buf_pool.mutex);
+ buf_flush_wait_batch_end(false);
+ mysql_mutex_unlock(&buf_pool.mutex);
if (n_pages)
{
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE,
@@ -1920,17 +1924,6 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious)
}
}
-/** Wait for pending flushes to complete. */
-void buf_flush_wait_batch_end_acquiring_mutex(bool lru)
-{
- if (lru ? buf_pool.n_flush_LRU() : buf_pool.n_flush_list())
- {
- mysql_mutex_lock(&buf_pool.mutex);
- buf_flush_wait_batch_end(lru);
- mysql_mutex_unlock(&buf_pool.mutex);
- }
-}
-
/** Conduct checkpoint-related flushing for innodb_flush_sync=ON,
and try to initiate checkpoints until the target is met.
@param lsn minimum value of buf_pool.get_oldest_modification(LSN_MAX) */
@@ -2042,8 +2035,9 @@ af_get_pct_for_lsn(
/ 7.5));
}
-/** This function is called approximately once every second by the
-page_cleaner thread if innodb_adaptive_flushing=ON.
+/** This function is called approximately once every second by
+buf_flush_page_cleaner() if innodb_max_dirty_pages_pct_lwm>0
+and innodb_adaptive_flushing=ON.
Based on various factors it decides if there is a need to do flushing.
@return number of pages recommended to be flushed
@param last_pages_in number of pages flushed in previous batch
@@ -2081,52 +2075,43 @@ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in,
n_pages= std::min<ulint>(srv_io_capacity, dirty_blocks);
}
+func_exit:
+ page_cleaner.flush_pass++;
return n_pages;
}
sum_pages += last_pages_in;
- double time_elapsed = difftime(curr_time, prev_time);
+ const ulint time_elapsed = std::max<ulint>(curr_time - prev_time, 1);
- /* We update our variables every srv_flushing_avg_loops
+ /* We update our variables every innodb_flushing_avg_loops
iterations to smooth out transition in workload. */
if (++n_iterations >= srv_flushing_avg_loops
- || time_elapsed >= static_cast<double>(srv_flushing_avg_loops)) {
+ || time_elapsed >= srv_flushing_avg_loops) {
- if (time_elapsed < 1) {
- time_elapsed = 1;
- }
-
- avg_page_rate = static_cast<ulint>(
- ((static_cast<double>(sum_pages)
- / time_elapsed)
- + static_cast<double>(avg_page_rate)) / 2);
+ avg_page_rate = (sum_pages / time_elapsed + avg_page_rate) / 2;
/* How much LSN we have generated since last call. */
- lsn_rate = static_cast<lsn_t>(
- static_cast<double>(cur_lsn - prev_lsn)
- / time_elapsed);
+ lsn_rate = (cur_lsn - prev_lsn) / time_elapsed;
lsn_avg_rate = (lsn_avg_rate + lsn_rate) / 2;
- ulint flush_tm = page_cleaner.flush_time;
- ulint flush_pass = page_cleaner.flush_pass;
-
- page_cleaner.flush_time = 0;
- page_cleaner.flush_pass = 0;
-
- if (flush_pass) {
- flush_tm /= flush_pass;
+ if (page_cleaner.flush_pass) {
+ page_cleaner.flush_time /= page_cleaner.flush_pass;
}
- MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME, flush_tm);
- MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_PASS, flush_pass);
-
prev_lsn = cur_lsn;
prev_time = curr_time;
- n_iterations = 0;
+ MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_TIME,
+ page_cleaner.flush_time);
+ MONITOR_SET(MONITOR_FLUSH_ADAPTIVE_AVG_PASS,
+ page_cleaner.flush_pass);
+ page_cleaner.flush_time = 0;
+ page_cleaner.flush_pass = 0;
+
+ n_iterations = 0;
sum_pages = 0;
}
@@ -2176,7 +2161,7 @@ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in,
MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty);
MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn);
- return(n_pages);
+ goto func_exit;
}
/** page_cleaner thread tasked with flushing dirty pages from the buffer
@@ -2205,7 +2190,7 @@ static void buf_flush_page_cleaner()
if (UNIV_UNLIKELY(lsn_limit != 0))
{
-furious_flush:
+ furious_flush:
if (UNIV_LIKELY(srv_flush_sync))
{
buf_flush_sync_for_checkpoint(lsn_limit);
@@ -2223,7 +2208,8 @@ furious_flush:
if (buf_pool.page_cleaner_idle() &&
(!UT_LIST_GET_LEN(buf_pool.flush_list) ||
srv_max_dirty_pages_pct_lwm == 0.0))
- my_cond_wait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex.m_mutex);
+ my_cond_wait(&buf_pool.do_flush_list,
+ &buf_pool.flush_list_mutex.m_mutex);
else
my_cond_timedwait(&buf_pool.do_flush_list,
&buf_pool.flush_list_mutex.m_mutex, &abstime);
@@ -2251,19 +2237,25 @@ furious_flush:
/* wake up buf_flush_wait() */
pthread_cond_broadcast(&buf_pool.done_flush_list);
}
-unemployed:
+ unemployed:
buf_flush_async_lsn= 0;
+ set_idle:
buf_pool.page_cleaner_set_idle(true);
-
- DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", continue;);
- DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", continue;);
-
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
+ end_of_batch:
+ buf_dblwr.flush_buffered_writes();
+
+ do
+ {
+ DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", continue;);
+ DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", continue;);
- if (!recv_recovery_is_on() &&
- !srv_startup_is_before_trx_rollback_phase &&
- srv_operation == SRV_OPERATION_NORMAL)
- log_checkpoint();
+ if (!recv_recovery_is_on() &&
+ !srv_startup_is_before_trx_rollback_phase &&
+ srv_operation == SRV_OPERATION_NORMAL)
+ log_checkpoint();
+ }
+ while (false);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
continue;
@@ -2314,56 +2306,40 @@ unemployed:
if (!lsn_limit)
lsn_limit= soft_lsn_limit;
- ulint n_flushed;
+ ulint n_flushed= 0, n;
if (UNIV_UNLIKELY(lsn_limit != 0))
{
- n_flushed= buf_flush_list(srv_max_io_capacity, lsn_limit);
- /* wake up buf_flush_wait() */
- pthread_cond_broadcast(&buf_pool.done_flush_list);
- goto try_checkpoint;
+ n= srv_max_io_capacity;
+ goto background_flush;
}
else if (idle_flush || !srv_adaptive_flushing)
{
- n_flushed= buf_flush_list(srv_io_capacity);
-try_checkpoint:
- if (n_flushed)
- {
- MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
- MONITOR_FLUSH_BACKGROUND_COUNT,
- MONITOR_FLUSH_BACKGROUND_PAGES,
- n_flushed);
-do_checkpoint:
- /* The periodic log_checkpoint() call here makes it harder to
- reproduce bugs in crash recovery or mariabackup --prepare, or
- in code that writes the redo log records. Omitting the call
- here should not affect correctness, because log_free_check()
- should still be invoking checkpoints when needed. */
- DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", goto next;);
- DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", goto next;);
-
- if (!recv_recovery_is_on() && srv_operation == SRV_OPERATION_NORMAL)
- log_checkpoint();
- }
+ n= srv_io_capacity;
+ lsn_limit= LSN_MAX;
+ background_flush:
+ mysql_mutex_lock(&buf_pool.mutex);
+ n_flushed= buf_flush_list_holding_mutex(n, lsn_limit);
+ /* wake up buf_flush_wait() */
+ pthread_cond_broadcast(&buf_pool.done_flush_list);
+ MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
+ MONITOR_FLUSH_BACKGROUND_COUNT,
+ MONITOR_FLUSH_BACKGROUND_PAGES,
+ n_flushed);
}
- else if (ulint n= page_cleaner_flush_pages_recommendation(last_pages,
- oldest_lsn,
- dirty_blocks,
- dirty_pct))
+ else if ((n= page_cleaner_flush_pages_recommendation(last_pages,
+ oldest_lsn,
+ dirty_blocks,
+ dirty_pct)) != 0)
{
- page_cleaner.flush_pass++;
const ulint tm= ut_time_ms();
- last_pages= n_flushed= buf_flush_list(n);
+ mysql_mutex_lock(&buf_pool.mutex);
+ last_pages= n_flushed= buf_flush_list_holding_mutex(n);
page_cleaner.flush_time+= ut_time_ms() - tm;
-
- if (n_flushed)
- {
- MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
- MONITOR_FLUSH_ADAPTIVE_COUNT,
- MONITOR_FLUSH_ADAPTIVE_PAGES,
- n_flushed);
- goto do_checkpoint;
- }
+ MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+ MONITOR_FLUSH_ADAPTIVE_COUNT,
+ MONITOR_FLUSH_ADAPTIVE_PAGES,
+ n_flushed);
}
else if (buf_flush_async_lsn <= oldest_lsn)
{
@@ -2371,24 +2347,29 @@ do_checkpoint:
goto unemployed;
}
-#ifndef DBUG_OFF
-next:
-#endif /* !DBUG_OFF */
- mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ n= buf_flush_LRU(n >= n_flushed ? n - n_flushed : 0, false);
+ mysql_mutex_unlock(&buf_pool.mutex);
+ last_pages+= n;
+
+ if (!idle_flush)
+ goto end_of_batch;
/* when idle flushing kicks in page_cleaner is marked active.
reset it back to idle since the it was made active as part of
idle flushing stage. */
- if (idle_flush)
- buf_pool.page_cleaner_set_idle(true);
+ mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ goto set_idle;
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (srv_fast_shutdown != 2)
{
- buf_flush_wait_batch_end_acquiring_mutex(true);
- buf_flush_wait_batch_end_acquiring_mutex(false);
+ buf_dblwr.flush_buffered_writes();
+ mysql_mutex_lock(&buf_pool.mutex);
+ buf_flush_wait_batch_end(true);
+ buf_flush_wait_batch_end(false);
+ mysql_mutex_unlock(&buf_pool.mutex);
}
mysql_mutex_lock(&buf_pool.flush_list_mutex);
@@ -2444,20 +2425,23 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool()
while (buf_pool.get_oldest_modification(0))
{
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
- buf_flush_list(srv_max_io_capacity);
- if (buf_pool.n_flush_list())
+ mysql_mutex_lock(&buf_pool.mutex);
+ buf_flush_list_holding_mutex(srv_max_io_capacity);
+ if (buf_pool.n_flush_list_)
{
+ mysql_mutex_unlock(&buf_pool.mutex);
timespec abstime;
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"Waiting to flush " ULINTPF " pages",
buf_flush_list_length());
set_timespec(abstime, INNODB_EXTEND_TIMEOUT_INTERVAL / 2);
+ buf_dblwr.flush_buffered_writes();
mysql_mutex_lock(&buf_pool.mutex);
while (buf_pool.n_flush_list_)
my_cond_timedwait(&buf_pool.done_flush_list, &buf_pool.mutex.m_mutex,
&abstime);
- mysql_mutex_unlock(&buf_pool.mutex);
}
+ mysql_mutex_unlock(&buf_pool.mutex);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
}
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 2c1f717fb9f..9fa6492d525 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -402,6 +402,7 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
&& recv_sys.apply_log_recs) {
goto flush_lru;
});
+get_mutex:
mysql_mutex_lock(&buf_pool.mutex);
got_mutex:
buf_LRU_check_size_of_non_data_objects();
@@ -490,15 +491,18 @@ not_found:
#ifndef DBUG_OFF
flush_lru:
#endif
- if (!buf_flush_LRU(innodb_lru_flush_size)) {
+ mysql_mutex_lock(&buf_pool.mutex);
+
+ if (!buf_flush_LRU(innodb_lru_flush_size, true)) {
MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT);
++flush_failures;
}
n_iterations++;
- mysql_mutex_lock(&buf_pool.mutex);
buf_pool.stat.LRU_waits++;
- goto got_mutex;
+ mysql_mutex_unlock(&buf_pool.mutex);
+ buf_dblwr.flush_buffered_writes();
+ goto get_mutex;
}
/** Move the LRU_old pointer so that the length of the old blocks list
@@ -807,50 +811,57 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
/* We cannot use transactional_lock_guard here,
because buf_buddy_relocate() in buf_buddy_free() could get stuck. */
hash_lock.lock();
- lsn_t oldest_modification = bpage->oldest_modification_acquire();
+ const lsn_t oldest_modification = bpage->oldest_modification_acquire();
if (UNIV_UNLIKELY(!bpage->can_relocate())) {
/* Do not free buffer fixed and I/O-fixed blocks. */
goto func_exit;
}
- if (oldest_modification == 1) {
+ switch (oldest_modification) {
+ case 2:
+ ut_ad(id.space() == SRV_TMP_SPACE_ID);
+ ut_ad(!bpage->zip.data);
+ if (!bpage->is_freed()) {
+ goto func_exit;
+ }
+ bpage->clear_oldest_modification();
+ break;
+ case 1:
mysql_mutex_lock(&buf_pool.flush_list_mutex);
- oldest_modification = bpage->oldest_modification();
- if (oldest_modification) {
- ut_ad(oldest_modification == 1);
+ if (const lsn_t om = bpage->oldest_modification()) {
+ ut_ad(om == 1);
buf_pool.delete_from_flush_list(bpage);
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
ut_ad(!bpage->oldest_modification());
- oldest_modification = 0;
- }
-
- if (zip || !bpage->zip.data) {
- /* This would completely free the block. */
- /* Do not completely free dirty blocks. */
-
- if (oldest_modification) {
- goto func_exit;
+ /* fall through */
+ case 0:
+ if (zip || !bpage->zip.data || !bpage->frame) {
+ break;
}
- } else if (oldest_modification && !bpage->frame) {
-func_exit:
- hash_lock.unlock();
- return(false);
-
- } else if (bpage->frame) {
+relocate_compressed:
b = static_cast<buf_page_t*>(ut_zalloc_nokey(sizeof *b));
ut_a(b);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
new (b) buf_page_t(*bpage);
b->frame = nullptr;
b->set_state(buf_page_t::UNFIXED + 1);
+ break;
+ default:
+ if (zip || !bpage->zip.data || !bpage->frame) {
+ /* This would completely free the block. */
+ /* Do not completely free dirty blocks. */
+func_exit:
+ hash_lock.unlock();
+ return(false);
+ }
+ goto relocate_compressed;
}
mysql_mutex_assert_owner(&buf_pool.mutex);
- DBUG_PRINT("ib_buf", ("free page %u:%u",
- id.space(), id.page_no()));
+ DBUG_PRINT("ib_buf", ("free page %u:%u", id.space(), id.page_no()));
ut_ad(bpage->can_relocate());
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 2dccdda8a2f..e79cbdadcd6 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -724,13 +724,14 @@ public:
ut_ad(s < REINIT);
}
- void read_unfix(uint32_t s)
+ uint32_t read_unfix(uint32_t s)
{
ut_ad(lock.is_write_locked());
ut_ad(s == UNFIXED + 1 || s == IBUF_EXIST + 1 || s == REINIT + 1);
- ut_d(auto old_state=) zip.fix.fetch_add(s - READ_FIX);
+ uint32_t old_state= zip.fix.fetch_add(s - READ_FIX);
ut_ad(old_state >= READ_FIX);
ut_ad(old_state < WRITE_FIX);
+ return old_state + (s - READ_FIX);
}
void set_freed(uint32_t prev_state, uint32_t count= 0)
@@ -782,10 +783,10 @@ public:
inline void write_complete(bool temporary);
/** Write a flushable page to a file. buf_pool.mutex must be held.
- @param lru true=buf_pool.LRU; false=buf_pool.flush_list
+ @param evict whether to evict the page on write completion
@param space tablespace
@return whether the page was flushed and buf_pool.mutex was released */
- inline bool flush(bool lru, fil_space_t *space);
+ inline bool flush(bool evict, fil_space_t *space);
/** Notify that a page in a temporary tablespace has been modified. */
void set_temp_modified()
@@ -1546,9 +1547,6 @@ public:
/** broadcast when n_flush_list reaches 0; protected by mutex */
pthread_cond_t done_flush_list;
- TPOOL_SUPPRESS_TSAN ulint n_flush_LRU() const { return n_flush_LRU_; }
- TPOOL_SUPPRESS_TSAN ulint n_flush_list() const { return n_flush_list_; }
-
/** @name General fields */
/* @{ */
ulint curr_pool_size; /*!< Current pool size in bytes */
@@ -1755,7 +1753,7 @@ public:
last_activity_count= activity_count;
}
- // n_flush_LRU() + n_flush_list()
+ // n_flush_LRU_ + n_flush_list_
// is approximately COUNT(is_write_fixed()) in flush_list
unsigned freed_page_clock;/*!< a sequence number used
@@ -1785,7 +1783,8 @@ public:
UT_LIST_BASE_NODE_T(buf_page_t) free;
/*!< base node of the free
block list */
- /** signaled each time when the free list grows; protected by mutex */
+ /** signaled each time when the free list grows and
+ broadcast each time try_LRU_scan is set; protected by mutex */
pthread_cond_t done_free;
UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
@@ -1851,9 +1850,9 @@ public:
return any_pending;
}
/** @return total amount of pending I/O */
- ulint io_pending() const
+ TPOOL_SUPPRESS_TSAN ulint io_pending() const
{
- return n_pend_reads + n_flush_LRU() + n_flush_list();
+ return n_pend_reads + n_flush_LRU_ + n_flush_list_;
}
private:
@@ -1886,34 +1885,12 @@ private:
/** array of slots */
buf_tmp_buffer_t *slots;
- void create(ulint n_slots)
- {
- this->n_slots= n_slots;
- slots= static_cast<buf_tmp_buffer_t*>
- (ut_malloc_nokey(n_slots * sizeof *slots));
- memset((void*) slots, 0, n_slots * sizeof *slots);
- }
+ void create(ulint n_slots);
- void close()
- {
- for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
- {
- aligned_free(s->crypt_buf);
- aligned_free(s->comp_buf);
- }
- ut_free(slots);
- slots= nullptr;
- n_slots= 0;
- }
+ void close();
/** Reserve a buffer */
- buf_tmp_buffer_t *reserve()
- {
- for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
- if (s->acquire())
- return s;
- return nullptr;
- }
+ buf_tmp_buffer_t *reserve();
} io_buf;
/** whether resize() is in the critical path */
@@ -2002,7 +1979,10 @@ inline void buf_page_t::set_oldest_modification(lsn_t lsn)
/** Clear oldest_modification after removing from buf_pool.flush_list */
inline void buf_page_t::clear_oldest_modification()
{
- mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
+#ifdef SAFE_MUTEX
+ if (oldest_modification() != 2)
+ mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
+#endif /* SAFE_MUTEX */
ut_d(const auto s= state());
ut_ad(s >= REMOVE_HASH);
ut_ad(oldest_modification());
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index f615b856126..d71a05c0ec9 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -86,11 +86,15 @@ buf_flush_init_for_writing(
bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed= nullptr)
MY_ATTRIBUTE((warn_unused_result));
-/** Write out dirty blocks from buf_pool.LRU.
+/** Write out dirty blocks from buf_pool.LRU,
+and move clean blocks to buf_pool.free.
+The caller must invoke buf_dblwr.flush_buffered_writes()
+after releasing buf_pool.mutex.
@param max_n wished maximum mumber of blocks flushed
-@return the number of processed pages
+@param evict whether to evict pages after flushing
+@return evict ? number of processed pages : number of pages written
@retval 0 if a buf_pool.LRU batch is already running */
-ulint buf_flush_LRU(ulint max_n);
+ulint buf_flush_LRU(ulint max_n, bool evict);
/** Wait until a flush batch ends.
@param lru true=buf_pool.LRU; false=buf_pool.flush_list */
@@ -131,9 +135,6 @@ inline void buf_flush_note_modification(buf_block_t *b, lsn_t start, lsn_t end)
/** Initialize page_cleaner. */
ATTRIBUTE_COLD void buf_flush_page_cleaner_init();
-/** Wait for pending flushes to complete. */
-void buf_flush_wait_batch_end_acquiring_mutex(bool lru);
-
/** Flush the buffer pool on shutdown. */
ATTRIBUTE_COLD void buf_flush_buffer_pool();