diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2020-12-18 17:12:57 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2020-12-18 19:23:34 +0200 |
commit | 0c23e32d27a11bae6dd38703eba674ef6eeb34f4 (patch) | |
tree | 3a75bd013f7e2b5d87057fc200fe109308e3bce3 | |
parent | cd093d79f990cfa7923d3f3ca9352c4f0ee1415d (diff) | |
download | mariadb-git-0c23e32d27a11bae6dd38703eba674ef6eeb34f4.tar.gz |
MDEV-24445 Using innodb_undo_tablespaces corrupts system tablespace
In the rewrite of MDEV-8139 (based on MDEV-15528), we introduced a
wrong assumption that any persistent tablespace that is not an .ibd
file is the system tablespace. This assumption is broken when
innodb_undo_tablespaces (files undo001, undo002, ...) are being used.
By default, we have innodb_undo_tablespaces=0 (the persistent undo
log is being stored in the system tablespace).
In MDEV-15528 and MDEV-8139 we rewrote the page scrubbing logic
so that it will follow the tried-and-true write-ahead logging
protocol, first writing FREE_PAGE records and then in the page
flushing, zerofilling or hole-punching freed pages.
Unfortunately, the implementation included a wrong assumption that
that anything that is not in an .ibd file must be the system tablespace.
This wrong assumption would cause overwrites of valid data pages in
the system tablespace.
mtr_t::m_freed_in_system_tablespace: Remove.
mtr_t::m_freed_space: The tablespace associated with m_freed_pages.
buf_page_free(): Take the tablespace and page number as a parameter,
instead of taking a page identifier.
-rw-r--r-- | storage/innobase/btr/btr0btr.cc | 8 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 63 | ||||
-rw-r--r-- | storage/innobase/fsp/fsp0fsp.cc | 5 | ||||
-rw-r--r-- | storage/innobase/ibuf/ibuf0ibuf.cc | 2 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 9 | ||||
-rw-r--r-- | storage/innobase/include/mtr0log.h | 32 | ||||
-rw-r--r-- | storage/innobase/include/mtr0mtr.h | 35 | ||||
-rw-r--r-- | storage/innobase/mtr/mtr0mtr.cc | 31 | ||||
-rw-r--r-- | storage/innobase/trx/trx0undo.cc | 2 |
9 files changed, 83 insertions, 104 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index b2de0ad33b2..804a1ac14db 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -725,9 +725,11 @@ void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, fseg_header_t* seg_header = &root[blob || page_is_leaf(block->frame) ? PAGE_HEADER + PAGE_BTR_SEG_LEAF : PAGE_HEADER + PAGE_BTR_SEG_TOP]; - fseg_free_page(seg_header, - index->table->space, id.page_no(), mtr); - buf_page_free(id, mtr, __FILE__, __LINE__); + fil_space_t* space= index->table->space; + const uint32_t page= id.page_no(); + + fseg_free_page(seg_header, space, page, mtr); + buf_page_free(space, page, mtr, __FILE__, __LINE__); /* The page was marked free in the allocation bitmap, but it should remain exclusively latched until mtr_t::commit() or until it diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index ab1cb06ad2b..9063013496f 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2489,52 +2489,49 @@ retry: } /** Mark the page status as FREED for the given tablespace id and -page number. If the page is not in the buffer pool then ignore it. -X-lock should be taken on the page before marking the page status -as FREED. It avoids the concurrent flushing of freed page. -Currently, this function only marks the page as FREED if it is -in buffer pool. -@param[in] page_id page id +page number. If the page is not in buffer pool then ignore it. +@param[in,out] space tablespace +@param[in] page page number @param[in,out] mtr mini-transaction @param[in] file file name @param[in] line line where called */ -void buf_page_free(const page_id_t page_id, - mtr_t *mtr, - const char *file, - unsigned line) +void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr, + const char *file, unsigned line) { ut_ad(mtr); ut_ad(mtr->is_active()); - buf_pool.stat.n_page_gets++; + if (srv_immediate_scrub_data_uncompressed +#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 + || space->is_compressed() +#endif + ) + mtr->add_freed_offset(space, page); + + buf_pool.stat.n_page_gets++; + const page_id_t page_id(space->id, page); const ulint fold= page_id.fold(); page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold); - buf_block_t *block= reinterpret_cast<buf_block_t*> - (buf_pool.page_hash_get_low(page_id, fold)); - - /* TODO: try to all this part of mtr_t::free() */ - if (srv_immediate_scrub_data_uncompressed || mtr->is_page_compressed()) - mtr->add_freed_offset(page_id); - - if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE) + if (buf_block_t *block= reinterpret_cast<buf_block_t*> + (buf_pool.page_hash_get_low(page_id, fold))) { - /* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE, - but avoid buf_zip_decompress() */ - hash_lock->read_unlock(); - return; - } + if (block->page.state() != BUF_BLOCK_FILE_PAGE) + /* FIXME: convert, but avoid buf_zip_decompress() */; + else + { + buf_block_buf_fix_inc(block, file, line); + ut_ad(block->page.buf_fix_count()); + hash_lock->read_unlock(); - block->fix(); - ut_ad(block->page.buf_fix_count()); - ut_ad(fsp_is_system_temporary(page_id.space()) || - rw_lock_s_lock_nowait(block->debug_latch, file, line)); + mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); + rw_lock_x_lock_inline(&block->lock, 0, file, line); + buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); - mtr_memo_type_t fix_type= MTR_MEMO_PAGE_X_FIX; - rw_lock_x_lock_inline(&block->lock, 0, file, line); - mtr_memo_push(mtr, block, fix_type); + block->page.status= buf_page_t::FREED; + return; + } + } - block->page.status= buf_page_t::FREED; - buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); hash_lock->read_unlock(); } diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index ae8c557b24c..5fed394b647 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -2637,9 +2637,8 @@ fseg_free_extent( for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) { if (!xdes_is_free(descr, i)) { - buf_page_free( - page_id_t(space->id, first_page_in_extent + 1), - mtr, __FILE__, __LINE__); + buf_page_free(space, first_page_in_extent + 1, mtr, + __FILE__, __LINE__); } } } diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 0d81f81135c..33c7df5947b 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -1986,7 +1986,7 @@ ibuf_remove_free_page(void) ibuf_bitmap_page_set_bits<IBUF_BITMAP_IBUF>( bitmap_page, page_id, srv_page_size, false, &mtr); - buf_page_free(page_id, &mtr, __FILE__, __LINE__); + buf_page_free(fil_system.sys_space, page_no, &mtr, __FILE__, __LINE__); ibuf_mtr_commit(&mtr); } diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index a314f62f383..fbd43fa0f78 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -361,14 +361,13 @@ buf_page_release_latch( void buf_page_make_young(buf_page_t *bpage); /** Mark the page status as FREED for the given tablespace id and page number. If the page is not in buffer pool then ignore it. -@param[in] page_id page_id +@param[in,out] space tablespace +@param[in] page page number @param[in,out] mtr mini-transaction @param[in] file file name @param[in] line line where called */ -void buf_page_free(const page_id_t page_id, - mtr_t *mtr, - const char *file, - unsigned line); +void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr, + const char *file, unsigned line); /********************************************************************//** Reads the freed_page_clock of a buffer block. diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index 2bcd69d8899..0d83d83b794 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -511,16 +511,18 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str, @param[in,out] b buffer page */ inline void mtr_t::init(buf_block_t *b) { - if (UNIV_LIKELY_NULL(m_freed_pages)) + const page_id_t id{b->page.id()}; + ut_ad(is_named_space(id.space())); + ut_ad(!m_freed_pages == !m_freed_space); + + if (UNIV_LIKELY_NULL(m_freed_space) && + m_freed_space->id == id.space() && + m_freed_pages->remove_if_exists(b->page.id().page_no()) && + m_freed_pages->empty()) { - ut_ad(m_log_mode != MTR_LOG_ALL || - m_user_space_id == b->page.id().space()); - if (m_freed_pages->remove_if_exists(b->page.id().page_no()) && - m_freed_pages->empty()) - { - delete m_freed_pages; - m_freed_pages= nullptr; - } + delete m_freed_pages; + m_freed_pages= nullptr; + m_freed_space= nullptr; } b->page.status= buf_page_t::INIT_ON_FLUSH; @@ -540,15 +542,11 @@ inline void mtr_t::init(buf_block_t *b) @param[in] offset page offset to be freed */ inline void mtr_t::free(fil_space_t &space, uint32_t offset) { - page_id_t freed_page_id(space.id, offset); - if (m_log_mode == MTR_LOG_ALL) - m_log.close(log_write<FREE_PAGE>(freed_page_id, nullptr)); + ut_ad(is_named_space(&space)); + ut_ad(!m_freed_space || m_freed_space == &space); - ut_ad(!m_user_space || m_user_space == &space); - if (&space == fil_system.sys_space) - freed_system_tablespace_page(); - else - m_user_space= &space; + if (m_log_mode == MTR_LOG_ALL) + m_log.close(log_write<FREE_PAGE>({space.id, offset}, nullptr)); } /** Write an EXTENDED log record. diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index f8ab7cf440f..d65a830dea2 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -316,24 +316,12 @@ public: /** @return true if we are inside the change buffer code */ bool is_inside_ibuf() const { return m_inside_ibuf; } - /** Note that system tablespace page has been freed. */ - void freed_system_tablespace_page() { m_freed_in_system_tablespace= true; } - /** Note that pages has been trimed */ void set_trim_pages() { m_trim_pages= true; } /** @return true if pages has been trimed */ bool is_trim_pages() { return m_trim_pages; } - /** @return whether a page_compressed table was modified */ - bool is_page_compressed() const - { -#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32) - return m_user_space && m_user_space->is_compressed(); -#else - return false; -#endif - } #ifdef UNIV_DEBUG /** Check if we are holding an rw-latch in this mini-transaction @param lock latch to search for @@ -376,12 +364,6 @@ public: /** @return the memo stack */ mtr_buf_t* get_memo() { return &m_memo; } - - /** @return true if system tablespace page has been freed */ - bool is_freed_system_tablespace_page() - { - return m_freed_in_system_tablespace; - } #endif /* UNIV_DEBUG */ /** @return true if a record was added to the mini-transaction */ @@ -587,12 +569,18 @@ public: const char *new_path= nullptr); /** Add freed page numbers to freed_pages */ - void add_freed_offset(page_id_t id) + void add_freed_offset(fil_space_t *space, uint32_t page) { - ut_ad(m_user_space == NULL || id.space() == m_user_space->id); + ut_ad(is_named_space(space)); if (!m_freed_pages) + { m_freed_pages= new range_set(); - m_freed_pages->add_value(id.page_no()); + ut_ad(!m_freed_space); + m_freed_space= space; + } + else + ut_ad(m_freed_space == space); + m_freed_pages->add_value(page); } /** Determine the added buffer fix count of a block. @@ -670,9 +658,6 @@ private: to suppress some read-ahead operations, @see ibuf_inside() */ uint16_t m_inside_ibuf:1; - /** whether the page has been freed in system tablespace */ - uint16_t m_freed_in_system_tablespace:1; - /** whether the pages has been trimmed */ uint16_t m_trim_pages:1; @@ -694,6 +679,8 @@ private: /** LSN at commit time */ lsn_t m_commit_lsn; + /** tablespace where pages have been freed */ + fil_space_t *m_freed_space= nullptr; /** set of freed page ids */ range_set *m_freed_pages= nullptr; }; diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 60c91364b15..2f29e4bff89 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -354,8 +354,10 @@ struct ReleaseBlocks void mtr_t::start() { ut_ad(!m_freed_pages); + ut_ad(!m_freed_space); MEM_UNDEFINED(this, sizeof *this); - MEM_MAKE_DEFINED(&m_freed_pages, sizeof(m_freed_pages)); + MEM_MAKE_DEFINED(&m_freed_space, sizeof m_freed_space); + MEM_MAKE_DEFINED(&m_freed_pages, sizeof m_freed_pages); ut_d(m_start= true); ut_d(m_commit= false); @@ -373,7 +375,7 @@ void mtr_t::start() ut_d(m_user_space_id= TRX_SYS_SPACE); m_user_space= nullptr; m_commit_lsn= 0; - m_freed_in_system_tablespace= m_trim_pages= false; + m_trim_pages= false; } /** Release the resources */ @@ -418,28 +420,23 @@ void mtr_t::commit() if (m_freed_pages) { ut_ad(!m_freed_pages->empty()); - fil_space_t *freed_space= m_user_space; - /* Get the freed tablespace in case of predefined tablespace */ - if (!freed_space) - { - ut_ad(is_freed_system_tablespace_page()); - freed_space= fil_system.sys_space; - } - - ut_ad(memo_contains(*freed_space)); + ut_ad(m_freed_space); + ut_ad(memo_contains(*m_freed_space)); + ut_ad(is_named_space(m_freed_space)); /* Update the last freed lsn */ - freed_space->update_last_freed_lsn(m_commit_lsn); + m_freed_space->update_last_freed_lsn(m_commit_lsn); if (!is_trim_pages()) for (const auto &range : *m_freed_pages) - freed_space->add_free_range(range); + m_freed_space->add_free_range(range); else - freed_space->clear_freed_ranges(); + m_freed_space->clear_freed_ranges(); delete m_freed_pages; m_freed_pages= nullptr; - /* Reset of m_trim_pages and m_freed_in_system_tablespace - happens in mtr_t::start() */ + /* mtr_t::start() will reset m_trim_pages */ } + else + ut_ad(!m_freed_space); m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks> (ReleaseBlocks(lsns.first, m_commit_lsn, @@ -476,8 +473,8 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn) ut_ad(!m_made_dirty); ut_ad(m_memo.size() == 0); ut_ad(!srv_read_only_mode); + ut_ad(!m_freed_space); ut_ad(!m_freed_pages); - ut_ad(!m_freed_in_system_tablespace); if (checkpoint_lsn) { byte* ptr = m_log.push<byte*>(SIZE_OF_FILE_CHECKPOINT); diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index 67e515127d0..96efa08f28d 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -629,7 +629,7 @@ trx_undo_free_page( fseg_free_page(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER + header_block->frame, rseg->space, page_no, mtr); - buf_page_free(page_id_t(space, page_no), mtr, __FILE__, __LINE__); + buf_page_free(rseg->space, page_no, mtr, __FILE__, __LINE__); const fil_addr_t last_addr = flst_get_last( TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + header_block->frame); |