From 92772485b75045501fef4f26d0e6514e9042116b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 23 Mar 2023 12:21:48 +0200 Subject: MDEV-30911 Multi-batch recovery of ROW_FORMAT=COMPRESSED table hangs In commit d6aed21621e438e6135415c0c09fc6adfc0be368 a condition at the start of buf_read_ahead_random() was refactored. Only the caller buf_read_recv_pages() was adjusted for this. We must in fact adjust every caller and make sure that spare blocks will be allocated while crash recovery is in progress. This is the simplest fix; ideally recovery would operate on the compressed page frame. The observed recovery hang occurred because pages 0 and 3 of a tablespace were being read due to buf_page_get_gen() calls by trx_resurrect_table_locks() before the log records for these pages had been applied. In buf_page_t::read_complete() we would skip the call to recv_recover_page() because no uncompressed page frame had been allocated for the block. --- storage/innobase/buf/buf0rea.cc | 51 ++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index bbd905365ed..7dab411dabb 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -341,8 +341,17 @@ read_ahead: /* Read all the suitable blocks within the area */ buf_block_t *block= nullptr; - if (!zip_size && !(block= buf_read_acquire())) - goto no_read_ahead; + if (UNIV_LIKELY(!zip_size)) + { + allocate_block: + if (UNIV_UNLIKELY(!(block= buf_read_acquire()))) + goto no_read_ahead; + } + else if (recv_recovery_is_on()) + { + zip_size|= 1; + goto allocate_block; + } for (page_id_t i= low; i < high; ++i) { @@ -354,7 +363,8 @@ read_ahead: { count++; ut_ad(!block); - if (!zip_size && !(block= buf_read_acquire())) + if ((UNIV_LIKELY(!zip_size) || (zip_size & 1)) && + UNIV_UNLIKELY(!(block= buf_read_acquire()))) break; } } @@ -406,11 +416,17 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size, buf_block_t *block= nullptr; if (UNIV_LIKELY(!zip_size)) { + allocate_block: mysql_mutex_lock(&buf_pool.mutex); buf_LRU_stat_inc_io(); block= buf_LRU_get_free_block(have_mutex); mysql_mutex_unlock(&buf_pool.mutex); } + else if (recv_recovery_is_on()) + { + zip_size|= 1; + goto allocate_block; + } dberr_t err= buf_read_page_low(page_id, zip_size, chain, space, block, true); buf_read_release(block); @@ -436,8 +452,17 @@ void buf_read_page_background(fil_space_t *space, const page_id_t page_id, } buf_block_t *block= nullptr; - if (!zip_size && !(block= buf_read_acquire())) - goto skip; + if (UNIV_LIKELY(!zip_size)) + { + allocate_block: + if (UNIV_UNLIKELY(!(block= buf_read_acquire()))) + goto skip; + } + else if (recv_recovery_is_on()) + { + zip_size|= 1; + goto allocate_block; + } if (buf_read_page_low(page_id, zip_size, chain, space, block) == DB_SUCCESS) @@ -584,8 +609,17 @@ failed: /* If we got this far, read-ahead can be sensible: do it */ buf_block_t *block= nullptr; - if (!zip_size && !(block= buf_read_acquire())) - goto fail; + if (UNIV_LIKELY(!zip_size)) + { + allocate_block: + if (UNIV_UNLIKELY(!(block= buf_read_acquire()))) + goto fail; + } + else if (recv_recovery_is_on()) + { + zip_size|= 1; + goto allocate_block; + } count= 0; for (; new_low != new_high_1; ++new_low) @@ -599,7 +633,8 @@ failed: { count++; ut_ad(!block); - if (!zip_size && !(block= buf_read_acquire())) + if ((UNIV_LIKELY(!zip_size) || (zip_size & 1)) && + UNIV_UNLIKELY(!(block= buf_read_acquire()))) break; } } -- cgit v1.2.1