diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2023-02-28 15:39:23 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2023-02-28 15:39:23 +0200 |
commit | c14a39431b211017e6809bb79c4079b38ffc3dff (patch) | |
tree | 2c4eb3e5f44272e865d4d542f93023cde9d52821 | |
parent | 57c526ffb852fb027e25fdc77173d45bdc60b8a2 (diff) | |
download | mariadb-git-c14a39431b211017e6809bb79c4079b38ffc3dff.tar.gz |
MDEV-30753 Possible corruption due to trx_purge_free_segment()
Starting with commit 0de3be8cfdfc26f5c236eaefe12d03c7b4af22c8 (MDEV-30671),
the field TRX_UNDO_NEEDS_PURGE lost its previous meaning.
The following scenario is possible:
(1) InnoDB is killed at a point of time corresponding to the durable
execution of some fseg_free_step_not_header() but not
trx_purge_remove_log_hdr().
(2) After restart, the affected pages are allocated for something else.
(3) Purge will attempt to access the newly reallocated pages when looking
for some old undo log records.
trx_purge_free_segment(): Invoke trx_purge_remove_log_hdr() as the first
thing, to be safe. If the server is killed, some pages will never be
freed. That is the lesser evil. Also, before each mtr.start(), invoke
log_free_check() to prevent ib_logfile0 overrun.
-rw-r--r-- | storage/innobase/include/log0log.inl | 1 | ||||
-rw-r--r-- | storage/innobase/trx/trx0purge.cc | 97 |
2 files changed, 39 insertions, 59 deletions
diff --git a/storage/innobase/include/log0log.inl b/storage/innobase/include/log0log.inl index d503e3ffec9..0ff8c2523d7 100644 --- a/storage/innobase/include/log0log.inl +++ b/storage/innobase/include/log0log.inl @@ -306,6 +306,7 @@ log_free_check(void) #ifdef UNIV_DEBUG static const latch_level_t latches[] = { + SYNC_REDO_RSEG, /* trx_purge_free_segment() */ SYNC_DICT, /* dict_sys.mutex during commit_try_rebuild() */ SYNC_DICT_OPERATION, /* dict_sys.latch X-latch during diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index f273903ef93..38438108480 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -345,66 +345,45 @@ static void trx_purge_remove_log_hdr(buf_block_t *rseg, buf_block_t* log, static void trx_purge_free_segment(mtr_t &mtr, trx_rseg_t* rseg, fil_addr_t hdr_addr) { - mtr.commit(); - mtr.start(); - ut_ad(mutex_own(&rseg->mutex)); - - buf_block_t* rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr); - buf_block_t* block = trx_undo_page_get( - page_id_t(rseg->space->id, hdr_addr.page), &mtr); - - /* Mark the last undo log totally purged, so that if the - system crashes, the tail of the undo log will not get accessed - again. The list of pages in the undo log tail gets - inconsistent during the freeing of the segment, and therefore - purge should not try to access them again. */ - mtr.write<2,mtr_t::MAYBE_NOP>(*block, block->frame + hdr_addr.boffset - + TRX_UNDO_NEEDS_PURGE, 0U); - - while (!fseg_free_step_not_header( - TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER - + block->frame, &mtr)) { - mtr.commit(); - mtr.start(); - - rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr); - - block = trx_undo_page_get( - page_id_t(rseg->space->id, hdr_addr.page), &mtr); - } - - /* The page list may now be inconsistent, but the length field - stored in the list base node tells us how big it was before we - started the freeing. */ - - const uint32_t seg_size = flst_get_len( - TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame); - - /* We may free the undo log segment header page; it must be freed - within the same mtr as the undo log header is removed from the - history list: otherwise, in case of a database crash, the segment - could become inaccessible garbage in the file space. */ - - trx_purge_remove_log_hdr(rseg_hdr, block, hdr_addr.boffset, &mtr); - - do { - - /* Here we assume that a file segment with just the header - page can be freed in a few steps, so that the buffer pool - is not flooded with bufferfixed pages: see the note in - fsp0fsp.cc. */ - - } while (!fseg_free_step(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER - + block->frame, &mtr)); - - byte* hist = TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->frame; - ut_ad(mach_read_from_4(hist) >= seg_size); - - mtr.write<4>(*rseg_hdr, hist, mach_read_from_4(hist) - seg_size); - - ut_ad(rseg->curr_size >= seg_size); + mtr.commit(); + log_free_check(); + mtr.start(); + ut_ad(mutex_own(&rseg->mutex)); + + buf_block_t *rseg_hdr= trx_rsegf_get(rseg->space, rseg->page_no, &mtr); + buf_block_t *block= + trx_undo_page_get(page_id_t(rseg->space->id, hdr_addr.page), &mtr); + const uint32_t seg_size= + flst_get_len(TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame); + ut_ad(rseg->curr_size >= seg_size); + rseg->curr_size-= seg_size; + + trx_purge_remove_log_hdr(rseg_hdr, block, hdr_addr.boffset, &mtr); + byte *hist= TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->frame; + ut_ad(mach_read_from_4(hist) >= seg_size); + mtr.write<4>(*rseg_hdr, hist, mach_read_from_4(hist) - seg_size); + + while (!fseg_free_step_not_header(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + + block->frame, &mtr)) + { + block->fix(); + mtr.commit(); + /* NOTE: If the server is killed after the log that was produced + up to this point was written, and before the log from the mtr.commit() + in our caller is written, then the pages belonging to the + undo log will become unaccessible garbage. + + This does not matters when using multiple innodb_undo_tablespaces; + innodb_undo_log_truncate=ON will be able to reclaim the space. */ + log_free_check(); + mtr.start(); + ut_ad(rw_lock_s_lock_nowait(block->debug_latch, __FILE__, __LINE__)); + rw_lock_x_lock(&block->lock); + mtr_memo_push(&mtr, block, MTR_MEMO_PAGE_X_FIX); + } - rseg->curr_size -= seg_size; + while (!fseg_free_step(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + + block->frame, &mtr)); } /** Remove unnecessary history data from a rollback segment. |