diff options
author | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2020-06-11 22:52:47 +0530 |
---|---|---|
committer | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2020-06-12 09:17:51 +0530 |
commit | c92f7e287fc0e21dc1b181284b1f8e2139d1c331 (patch) | |
tree | 259aa446838140f83ec4d5f8fd8d6aa9301d2652 /storage | |
parent | 07d1c8567cbfe94398a9857c47fb9919cad42651 (diff) | |
download | mariadb-git-c92f7e287fc0e21dc1b181284b1f8e2139d1c331.tar.gz |
MDEV-8139 Fix Scrubbing
fil_space_t::freed_ranges: Store ranges of freed page numbers.
fil_space_t::last_freed_lsn: Store the most recent LSN of
freeing a page.
fil_space_t::freed_mutex: Protects freed_ranges, last_freed_lsn.
fil_space_create(): Initialize the freed_range mutex.
fil_space_free_low(): Frees the freed_range mutex.
range_set: Ranges of page numbers.
buf_page_create(): Removes the page from freed_ranges when page
is being reused.
btr_free_root(): Remove the PAGE_INDEX_ID invalidation. Because
btr_free_root() and dict_drop_index_tree() are executed in
the same atomic mini-transaction, there is no need to
invalidate the root page.
buf_release_freed_page(): Split from buf_flush_freed_page().
Skip any I/O
buf_flush_freed_pages(): Get the freed ranges from tablespace and
Write punch-hole or zeroes of the freed ranges.
buf_flush_try_neighbors(): Handles the flushing of freed ranges.
mtr_t::freed_pages: Variable to store the list of freed pages.
mtr_t::add_freed_pages(): To add freed pages.
mtr_t::clear_freed_pages(): To clear the freed pages.
mtr_t::m_freed_in_system_tablespace: Variable to indicate whether page has
been freed in system tablespace.
mtr_t::m_trim_pages: Variable to indicate whether the space has been trimmed.
mtr_t::commit(): Add the freed page and update the last freed lsn
in the tablespace and clear the tablespace freed range if space is
trimmed.
file_name_t::freed_pages: Store the freed pages during recovery.
file_name_t::add_freed_page(), file_name_t::remove_freed_page(): To
add and remove freed page during recovery.
store_freed_or_init_rec(): Store or remove the freed pages while
encountering FREE_PAGE or INIT_PAGE redo log record.
recv_init_crash_recovery_spaces(): Add the freed page encountered
during recovery to respective tablespace.
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innobase/btr/btr0btr.cc | 27 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 14 | ||||
-rw-r--r-- | storage/innobase/buf/buf0flu.cc | 93 | ||||
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 5 | ||||
-rw-r--r-- | storage/innobase/fsp/fsp0fsp.cc | 23 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 6 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 232 | ||||
-rw-r--r-- | storage/innobase/include/mtr0log.h | 15 | ||||
-rw-r--r-- | storage/innobase/include/mtr0mtr.h | 52 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 50 | ||||
-rw-r--r-- | storage/innobase/mtr/mtr0mtr.cc | 28 | ||||
-rw-r--r-- | storage/innobase/trx/trx0purge.cc | 4 |
12 files changed, 473 insertions, 76 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index ce72bdd7ef5..03eb1e076e4 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -749,11 +749,6 @@ void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, should remain exclusively latched until mtr_t::commit() or until it is explicitly freed from the mini-transaction. */ ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); - - /* MDEV-15528 FIXME: Zero out the page after the redo log for - this mini-transaction has been durably written. - This must be done unconditionally if - srv_immediate_scrub_data_uncompressed is set. */ } /** Set the child page number in a node pointer record. @@ -959,9 +954,8 @@ have been called. In a persistent tablespace, the caller must invoke fsp_init_file_page() before mtr.commit(). @param[in,out] block index root page -@param[in,out] mtr mini-transaction -@param[in] invalidate whether to invalidate PAGE_INDEX_ID */ -static void btr_free_root(buf_block_t *block, mtr_t *mtr, bool invalidate) +@param[in,out] mtr mini-transaction */ +static void btr_free_root(buf_block_t *block, mtr_t *mtr) { ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); @@ -973,16 +967,6 @@ static void btr_free_root(buf_block_t *block, mtr_t *mtr, bool invalidate) ut_a(btr_root_fseg_validate(PAGE_HEADER + PAGE_BTR_SEG_TOP + block->frame, block->page.id().space())); #endif /* UNIV_BTR_DEBUG */ - if (invalidate) - { - constexpr uint16_t field= PAGE_HEADER + PAGE_INDEX_ID; - - byte *page_index_id= my_assume_aligned<2>(field + block->frame); - if (mtr->write<8,mtr_t::MAYBE_NOP>(*block, page_index_id, - BTR_FREED_INDEX_ID) && - UNIV_LIKELY_NULL(block->page.zip.data)) - memcpy_aligned<2>(&block->page.zip.data[field], page_index_id, 8); - } /* Free the entire segment in small steps. */ while (!fseg_free_step(PAGE_HEADER + PAGE_BTR_SEG_TOP + block->frame, mtr)); @@ -1099,8 +1083,7 @@ btr_create( PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) { /* Not enough space for new segment, free root segment before return. */ - btr_free_root(block, mtr, - !index || !index->table->is_temporary()); + btr_free_root(block, mtr); return(FIL_NULL); } @@ -1250,7 +1233,7 @@ btr_free_if_exists( btr_free_but_not_root(root, mtr->get_log_mode()); mtr->set_named_space_id(page_id.space()); - btr_free_root(root, mtr, true); + btr_free_root(root, mtr); } /** Free an index tree in a temporary tablespace. @@ -1265,7 +1248,7 @@ void btr_free(const page_id_t page_id) if (block) { btr_free_but_not_root(block, MTR_LOG_NO_REDO); - btr_free_root(block, &mtr, false); + btr_free_root(block, &mtr); } mtr.commit(); } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 47b4eac0ed2..f3b819056d0 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2579,12 +2579,13 @@ void buf_page_free(const page_id_t page_id, buf_block_t *block= reinterpret_cast<buf_block_t*> (buf_pool.page_hash_get_low(page_id)); + if (srv_immediate_scrub_data_uncompressed || mtr->is_page_compressed()) + mtr->add_freed_offset(page_id); + if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE) { /* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE, but avoid buf_zip_decompress() */ - /* FIXME: If block==NULL, introduce a separate data structure - to cover freed page ranges to augment buf_flush_freed_page() */ rw_lock_s_unlock(hash_lock); return; } @@ -3793,16 +3794,20 @@ void buf_block_t::initialise(const page_id_t page_id, ulint zip_size, from a file even if it cannot be found in the buffer buf_pool. This is one of the functions which perform to a block a state transition NOT_USED => FILE_PAGE (the other is buf_page_get_gen). -@param[in] page_id page id +@param[in,out] space space object +@param[in] offset offset of the tablespace @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in,out] mtr mini-transaction @return pointer to the block, page bufferfixed */ buf_block_t* -buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr) +buf_page_create(fil_space_t *space, uint32_t offset, + ulint zip_size, mtr_t *mtr) { + page_id_t page_id(space->id, offset); ut_ad(mtr->is_active()); ut_ad(page_id.space() != 0 || !zip_size); + space->free_page(offset, false); buf_block_t *free_block= buf_LRU_get_free_block(false); free_block->initialise(page_id, zip_size, 1); @@ -3831,7 +3836,6 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr) return buf_page_get_gen(page_id, zip_size, RW_NO_LATCH, block, BUF_GET_POSSIBLY_FREED, __FILE__, __LINE__, mtr); - mutex_exit(&recv_sys.mutex); block= buf_page_get_with_no_latch(page_id, zip_size, mtr); mutex_enter(&recv_sys.mutex); diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 8306f698289..22d94762757 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -978,43 +978,22 @@ not_compressed: This function also resets the IO_FIX to IO_NONE and making the page status as NORMAL. It initiates the write to the file only after releasing the page from flush list and its associated mutex. -@param[in,out] bpage freed buffer page -@param[in] space tablespace object of the freed page */ -static void buf_flush_freed_page(buf_page_t *bpage, const fil_space_t &space) +@param[in,out] bpage freed buffer page */ +static void buf_release_freed_page(buf_page_t *bpage) { ut_ad(bpage->in_file()); const bool uncompressed= bpage->state() == BUF_BLOCK_FILE_PAGE; - const page_id_t page_id(bpage->id()); - const auto zip_size= bpage->zip_size(); mutex_enter(&buf_pool.mutex); bpage->set_io_fix(BUF_IO_NONE); bpage->status= buf_page_t::NORMAL; buf_flush_remove(bpage); - buf_pool.stat.n_pages_written++; - mutex_exit(&buf_pool.mutex); if (uncompressed) rw_lock_sx_unlock_gen(&reinterpret_cast<buf_block_t*>(bpage)->lock, BUF_IO_WRITE); - const bool punch_hole= -#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32) - space.is_compressed() || -#endif - false; - - ut_ad(space.id == page_id.space()); - ut_ad(space.zip_size() == zip_size); - - if (punch_hole || srv_immediate_scrub_data_uncompressed) - { - fil_io_t fio= fil_io(IORequestWrite, punch_hole, page_id, zip_size, 0, - zip_size ? zip_size : srv_page_size, - const_cast<byte*>(field_ref_zero), nullptr, false, - punch_hole); - if (punch_hole && fio.node) - fio.node->space->release_for_io(); - } + buf_LRU_free_page(bpage, true); + mutex_exit(&buf_pool.mutex); } /** Write a flushable page from buf_pool to a file. @@ -1192,7 +1171,7 @@ bool buf_flush_page(buf_page_t *bpage, IORequest::flush_t flush_type, switch (status) { default: ut_ad(status == buf_page_t::FREED); - buf_flush_freed_page(bpage, *space); + buf_release_freed_page(bpage); goto done; case buf_page_t::NORMAL: use_doublewrite= space->use_doublewrite(); @@ -1322,7 +1301,64 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, return i; } -/** Flushes to disk all flushable pages within the flush area. +/** Write punch-hole or zeroes of the freed ranges when +innodb_immediate_scrub_data_uncompressed from the freed ranges. +@param[in] space tablespace which contains freed ranges +@param[in] freed_ranges freed ranges of the page to be flushed */ +static void buf_flush_freed_pages(fil_space_t *space) +{ + ut_ad(space != NULL); + if (!srv_immediate_scrub_data_uncompressed && !space->is_compressed()) + return; + lsn_t flush_to_disk_lsn= log_sys.get_flushed_lsn(); + + std::unique_lock<std::mutex> freed_lock(space->freed_range_mutex); + if (space->freed_ranges.empty() + || flush_to_disk_lsn < space->get_last_freed_lsn()) + { + freed_lock.unlock(); + return; + } + + range_set freed_ranges= std::move(space->freed_ranges); + freed_lock.unlock(); + const bool punch_hole= +#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32) + space->is_compressed() || +#endif + false; + + for (const auto &range : freed_ranges) + { + ulint page_size= space->zip_size(); + if (!page_size) + page_size= srv_page_size; + + if (punch_hole) + { + const auto len= (range.last - range.first + 1) * page_size; + const page_id_t page_id(space->id, range.first); + fil_io_t fio= fil_io(IORequestWrite, true, page_id, space->zip_size(), + 0, len, nullptr, nullptr, false, true); + if (fio.node) + fio.node->space->release_for_io(); + } + else if (srv_immediate_scrub_data_uncompressed) + { + for (auto i= range.first; i <= range.last; i++) + { + const page_id_t page_id(space->id, i); + fil_io(IORequestWrite, false, page_id, space->zip_size(), 0, + space->zip_size() ? space->zip_size() : srv_page_size, + const_cast<byte*>(field_ref_zero), nullptr, false, false); + } + } + buf_pool.stat.n_pages_written+= (range.last - range.first + 1); + } +} + +/** Flushes to disk all flushable pages within the flush area +and also write zeroes or punch the hole for the freed ranges of pages. @param[in] page_id page id @param[in] flush LRU or FLUSH_LIST @param[in] n_flushed number of pages flushed so far in this batch @@ -1344,6 +1380,9 @@ buf_flush_try_neighbors( return 0; } + /* Flush the freed ranges while flushing the neighbors */ + buf_flush_freed_pages(space); + page_id_t id = page_id; page_id_t high = (srv_flush_neighbors != 1 || UT_LIST_GET_LEN(buf_pool.LRU) diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 5ba5b0f703e..e3fdc393564 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1065,6 +1065,7 @@ fil_space_free_low( rw_lock_free(&space->latch); fil_space_destroy_crypt_data(&space->crypt_data); + space->~fil_space_t(); ut_free(space->name); ut_free(space); } @@ -1157,7 +1158,9 @@ fil_space_create( return(NULL); } - space = static_cast<fil_space_t*>(ut_zalloc_nokey(sizeof(*space))); + /* FIXME: if calloc() is defined as an inline function that calls + memset() or bzero(), then GCC 6 -flifetime-dse can optimize it away */ + space= new (ut_zalloc_nokey(sizeof(*space))) fil_space_t; space->id = id; space->name = mem_strdup(name); diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 2d3f6cbc2e9..94a11778beb 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -554,7 +554,7 @@ void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr) mtr_x_lock_space(space, mtr); const auto savepoint = mtr->get_savepoint(); - buf_block_t* block = buf_page_create(page_id, zip_size, mtr); + buf_block_t* block = buf_page_create(space, 0, zip_size, mtr); mtr->sx_latch_at_savepoint(savepoint, block); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); @@ -875,8 +875,9 @@ fsp_fill_free_list( if (i > 0) { const auto savepoint = mtr->get_savepoint(); - block= buf_page_create(page_id_t(space->id, i), - zip_size, mtr); + block= buf_page_create( + space, static_cast<uint32_t>(i), + zip_size, mtr); mtr->sx_latch_at_savepoint(savepoint, block); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); @@ -898,8 +899,9 @@ fsp_fill_free_list( ibuf_mtr.set_named_space(space); block = buf_page_create( - page_id_t(space->id, - i + FSP_IBUF_BITMAP_OFFSET), + space, + static_cast<uint32_t>( + i + FSP_IBUF_BITMAP_OFFSET), zip_size, &ibuf_mtr); ibuf_mtr.sx_latch_at_savepoint(0, block); buf_block_dbg_add_level(block, SYNC_FSP_PAGE); @@ -1059,8 +1061,9 @@ fsp_page_create( rw_lock_type_t rw_latch, mtr_t* mtr) { - buf_block_t* block = buf_page_create(page_id_t(space->id, offset), - space->zip_size(), mtr); + buf_block_t* block = buf_page_create( + space, static_cast<uint32_t>(offset), + space->zip_size(), mtr); /* The latch may already have been acquired, so we cannot invoke mtr_t::x_latch_at_savepoint() or mtr_t::sx_latch_at_savepoint(). */ @@ -1251,7 +1254,7 @@ static void fsp_free_page(fil_space_t* space, page_no_t offset, mtr_t* mtr) return; } - mtr->free(page_id_t(space->id, offset)); + mtr->free(*space, static_cast<uint32_t>(offset)); const ulint bit = offset % FSP_EXTENT_SIZE; @@ -2557,7 +2560,7 @@ fseg_free_page_low( fsp_free_extent(space, offset, mtr); } - mtr->free(page_id_t(space->id, offset)); + mtr->free(*space, static_cast<uint32_t>(offset)); } /** Free a page in a file segment. @@ -2674,7 +2677,7 @@ fseg_free_extent( for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) { if (!xdes_is_free(descr, i)) { buf_page_free( - page_id_t(space->id, first_page_in_extent + i), + page_id_t(space->id, first_page_in_extent + 1), mtr, __FILE__, __LINE__); } } diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 825eb7631fe..73c153cf6d7 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -340,12 +340,14 @@ buf_page_get_low( from a file even if it cannot be found in the buffer buf_pool. This is one of the functions which perform to a block a state transition NOT_USED => FILE_PAGE (the other is buf_page_get_gen). -@param[in] page_id page id +@param[in,out] space space object +@param[in] offset offset of the tablespace @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in,out] mtr mini-transaction @return pointer to the block, page bufferfixed */ buf_block_t* -buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr); +buf_page_create(fil_space_t *space, uint32_t offset, + ulint zip_size, mtr_t *mtr); /********************************************************************//** Releases a compressed-only page acquired with buf_page_get_zip(). */ diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 62228db822f..09496a2c5ca 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -37,9 +37,8 @@ Created 10/25/1995 Heikki Tuuri #include "log0recv.h" #include "dict0types.h" #include "ilist.h" -#ifdef UNIV_LINUX -# include <set> -#endif +#include <set> +#include <mutex> struct unflushed_spaces_tag_t; struct rotation_list_tag_t; @@ -111,6 +110,175 @@ enum fil_type_t { struct fil_node_t; +/** Structure to store first and last value of range */ +struct range_t +{ + uint32_t first; + uint32_t last; +}; + +/** Sort the range based on first value of the range */ +struct range_compare +{ + bool operator() (const range_t lhs, const range_t rhs) const + { + return lhs.first < rhs.first; + } +}; + +using range_set_t= std::set<range_t, range_compare>; +/** Range to store the set of ranges of integers */ +class range_set +{ +private: + range_set_t ranges; +public: + /** Merge the current range with previous range. + @param[in] range range to be merged + @param[in] prev_range range to be merged with next */ + void merge_range(range_set_t::iterator range, + range_set_t::iterator prev_range) + { + if (range->first != prev_range->last + 1) + return; + + /* Merge the current range with previous range */ + range_t new_range {prev_range->first, range->last}; + ranges.erase(prev_range); + ranges.erase(range); + ranges.emplace(new_range); + } + + /** Split the range and add two more ranges + @param[in] range range to be split + @param[in] value Value to be removed from range */ + void split_range(range_set_t::iterator range, uint32_t value) + { + range_t split1{range->first, value - 1}; + range_t split2{value + 1, range->last}; + + /* Remove the existing element */ + ranges.erase(range); + + /* Insert the two elements */ + ranges.emplace(split1); + ranges.emplace(split2); + } + + /** Remove the value with the given range + @param[in,out] range range to be changed + @param[in] value value to be removed */ + void remove_within_range(range_set_t::iterator range, uint32_t value) + { + range_t new_range{range->first, range->last}; + if (value == range->first) + { + if (range->first == range->last) + { + ranges.erase(range); + return; + } + else + new_range.first++; + } + else if (value == range->last) + new_range.last--; + else if (range->first < value && range->last > value) + return split_range(range, value); + + ranges.erase(range); + ranges.emplace(new_range); + } + + /** Remove the value from the ranges. + @param[in] value Value to be removed. */ + void remove_value(uint32_t value) + { + if (ranges.empty()) + return; + range_t new_range {value, value}; + range_set_t::iterator range= ranges.lower_bound(new_range); + if (range == ranges.end()) + return remove_within_range(std::prev(range), value); + + if (range->first > value && range != ranges.begin()) + /* Iterate the previous ranges to delete */ + return remove_within_range(std::prev(range), value); + return remove_within_range(range, value); + } + /** Add the value within the existing range + @param[in] range_set::add_rangerange range to be modified + @param[in] value value to be added */ + range_set_t::iterator add_within_range(range_set_t::iterator range, + uint32_t value) + { + if (range->first <= value && range->last >= value) + return range; + + range_t new_range{range->first, range->last}; + if (range->last + 1 == value) + new_range.last++; + else if (range->first - 1 == value) + new_range.first--; + else return ranges.end(); + ranges.erase(range); + return ranges.emplace(new_range).first; + } + /** Add the range in the ranges set + @param[in] new_range range to be added */ + void add_range(range_t new_range) + { + auto r_offset= ranges.lower_bound(new_range); + auto r_begin= ranges.begin(); + auto r_end= ranges.end(); + if (!ranges.size()) + { +new_range: + ranges.emplace(new_range); + return; + } + + if (r_offset == r_end) + { + /* last range */ + if (add_within_range(std::prev(r_offset), new_range.first) == r_end) + goto new_range; + } + else if (r_offset == r_begin) + { + /* First range */ + if (add_within_range(r_offset, new_range.first) == r_end) + goto new_range; + } + else if (r_offset->first - 1 == new_range.first) + { + /* Change starting of the existing range */ + auto r_value= add_within_range(r_offset, new_range.first); + if (r_value != ranges.begin()) + merge_range(r_value, std::prev(r_value)); + } + else + { + /* previous range last_value alone */ + if (add_within_range(std::prev(r_offset), new_range.first) == r_end) + goto new_range; + } + } + + /** Add the value in the ranges + @param[in] value value to be added */ + void add_value(uint32_t value) + { + range_t new_range{value, value}; + add_range(new_range); + } + + ulint size() { return ranges.size(); } + void clear() { ranges.clear(); } + bool empty() const { return ranges.empty(); } + typename range_set_t::iterator begin() { return ranges.begin(); } + typename range_set_t::iterator end() { return ranges.end(); } +}; #endif /** Tablespace or log data space */ @@ -203,6 +371,16 @@ struct fil_space_t punch hole */ bool punch_hole; + /** mutex to protect freed ranges */ + std::mutex freed_range_mutex; + + /** Variables to store freed ranges. This can be used to write + zeroes/punch the hole in files. Protected by freed_mutex */ + range_set freed_ranges; + + /** Stores last page freed lsn. Protected by freed_mutex */ + lsn_t last_freed_lsn; + ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ /** @return whether the tablespace is about to be dropped */ @@ -314,6 +492,22 @@ struct fil_space_t ut_ad(0); return false; } + + /** @return last_freed_lsn */ + lsn_t get_last_freed_lsn() { return last_freed_lsn; } + /** Update last_freed_lsn */ + void update_last_freed_lsn(lsn_t lsn) + { + std::lock_guard<std::mutex> freed_lock(freed_range_mutex); + last_freed_lsn= lsn; + } + + /** Clear all freed ranges */ + void clear_freed_ranges() + { + std::lock_guard<std::mutex> freed_lock(freed_range_mutex); + freed_ranges.clear(); + } #endif /* !UNIV_INNOCHECKSUM */ /** FSP_SPACE_FLAGS and FSP_FLAGS_MEM_ flags; check fsp0types.h to more info about flags. */ @@ -583,6 +777,38 @@ struct fil_space_t return(ssize == 0 || !is_ibd || srv_page_size != UNIV_PAGE_SIZE_ORIG); } + +#ifndef UNIV_INNOCHECKSUM + /** Add/remove the free page in the freed ranges list. + @param[in] offset page number to be added + @param[in] free true if page to be freed */ + void free_page(uint32_t offset, bool add=true) + { + std::lock_guard<std::mutex> freed_lock(freed_range_mutex); + if (add) + return freed_ranges.add_value(offset); + + if (freed_ranges.empty()) + return; + + return freed_ranges.remove_value(offset); + } + + /** Add the range of freed pages */ + void add_free_ranges(range_set ranges) + { + std::lock_guard<std::mutex> freed_lock(freed_range_mutex); + freed_ranges= std::move(ranges); + } + + /** Add the set of freed page ranges */ + void add_free_range(const range_t range) + { + std::lock_guard<std::mutex> freed_lock(freed_range_mutex); + freed_ranges.add_range(range); + } +#endif /*!UNIV_INNOCHECKSUM */ + }; #ifndef UNIV_INNOCHECKSUM diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index afb9456ff30..cf5f7c751ee 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -524,11 +524,19 @@ inline void mtr_t::init(buf_block_t *b) } /** Free a page. -@param id page identifier */ -inline void mtr_t::free(const page_id_t id) +@param[in] space tablespace contains page to be freed +@param[in] offset page offset to be freed */ +inline void mtr_t::free(fil_space_t &space, uint32_t offset) { + page_id_t freed_page_id(space.id, offset); if (m_log_mode == MTR_LOG_ALL) - m_log.close(log_write<FREE_PAGE>(id, nullptr)); + m_log.close(log_write<FREE_PAGE>(freed_page_id, nullptr)); + + ut_ad(!m_user_space || m_user_space == &space); + if (&space == fil_system.sys_space) + freed_system_tablespace_page(); + else + m_user_space= &space; } /** Write an EXTENDED log record. @@ -651,4 +659,5 @@ inline void mtr_t::trim_pages(const page_id_t id) byte *l= log_write<EXTENDED>(id, nullptr, 1, true); *l++= TRIM_PAGES; m_log.close(l); + set_trim_pages(); } diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 7cc0939d115..cd1b9bef4aa 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -312,6 +312,24 @@ public: /** @return true if we are inside the change buffer code */ bool is_inside_ibuf() const { return m_inside_ibuf; } + /** Note that system tablespace page has been freed. */ + void freed_system_tablespace_page() { m_freed_in_system_tablespace= true; } + + /** Note that pages has been trimed */ + void set_trim_pages() { m_trim_pages= true; } + + /** @return true if pages has been trimed */ + bool is_trim_pages() { return m_trim_pages; } + + /** @return whether a page_compressed table was modified */ + bool is_page_compressed() const + { +#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32) + return m_user_space && m_user_space->is_compressed(); +#else + return false; +#endif + } #ifdef UNIV_DEBUG /** Check if we are holding an rw-latch in this mini-transaction @param lock latch to search for @@ -348,6 +366,12 @@ public: /** @return the memo stack */ mtr_buf_t* get_memo() { return &m_memo; } + + /** @return true if system tablespace page has been freed */ + bool is_freed_system_tablespace_page() + { + return m_freed_in_system_tablespace; + } #endif /* UNIV_DEBUG */ /** @return true if a record was added to the mini-transaction */ @@ -470,8 +494,9 @@ public: @param[in,out] b buffer page */ void init(buf_block_t *b); /** Free a page. - @param id page identifier */ - inline void free(const page_id_t id); + @param[in] space tablespace contains page to be freed + @param[in] offset page offset to be freed */ + inline void free(fil_space_t &space, uint32_t offset); /** Write log for partly initializing a B-tree or R-tree page. @param block B-tree or R-tree page @param comp false=ROW_FORMAT=REDUNDANT, true=COMPACT or DYNAMIC */ @@ -551,6 +576,20 @@ public: const char *path, const char *new_path= nullptr); + /** Add freed page numbers to freed_pages */ + void add_freed_offset(page_id_t id) + { + ut_ad(m_user_space == NULL || id.space() == m_user_space->id); + m_freed_ranges.add_value(id.page_no()); + } + + /** Clear the freed pages */ + void clear_freed_ranges() + { + m_freed_ranges.clear(); + m_freed_in_system_tablespace= 0; + m_trim_pages= false; + } private: /** Log a write of a byte string to a page. @param block buffer page @@ -621,6 +660,12 @@ private: to suppress some read-ahead operations, @see ibuf_inside() */ uint16_t m_inside_ibuf:1; + /** whether the page has been freed in system tablespace */ + uint16_t m_freed_in_system_tablespace:1; + + /** whether the pages has been trimmed */ + uint16_t m_trim_pages:1; + #ifdef UNIV_DEBUG /** Persistent user tablespace associated with the mini-transaction, or 0 (TRX_SYS_SPACE) if none yet */ @@ -638,6 +683,9 @@ private: /** LSN at commit time */ lsn_t m_commit_lsn; + + /** set of freed page ids */ + range_set m_freed_ranges; }; #include "mtr0mtr.ic" diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 5c6b665a604..5dc9e70df63 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -543,11 +543,24 @@ struct file_name_t { /** FSP_SIZE of tablespace */ ulint size; + /** Freed pages of tablespace */ + range_set freed_ranges; + /** Constructor */ file_name_t(std::string name_, bool deleted) : name(std::move(name_)), space(NULL), status(deleted ? DELETED: NORMAL), size(0) {} + + /** Add the freed pages */ + void add_freed_page(uint32_t page_no) { freed_ranges.add_value(page_no); } + + /** Remove the freed pages */ + void remove_freed_page(uint32_t page_no) + { + if (freed_ranges.empty()) return; + freed_ranges.remove_value(page_no); + } }; /** Map of dirty tablespaces during recovery */ @@ -1764,6 +1777,34 @@ append: log_phys_t(start_lsn, lsn, l, len)); } +/** Store/remove the freed pages in fil_name_t of recv_spaces. +@param[in] page_id freed or init page_id +@param[in] freed TRUE if page is freed */ +static void store_freed_or_init_rec(page_id_t page_id, bool freed) +{ + uint32_t space_id= page_id.space(); + uint32_t page_no= page_id.page_no(); + if (is_predefined_tablespace(space_id)) + { + fil_space_t *space; + if (space_id == TRX_SYS_SPACE) + space= fil_system.sys_space; + else + space= fil_space_get(space_id); + + space->free_page(page_no, freed); + return; + } + + recv_spaces_t::iterator i= recv_spaces.lower_bound(space_id); + if (i != recv_spaces.end() && i->first == space_id) + { + if (freed) + i->second.add_freed_page(page_no); + else + i->second.remove_freed_page(page_no); + } +} /** Parse and register one mini-transaction in log_t::FORMAT_10_5. @param checkpoint_lsn the log sequence number of the latest checkpoint @@ -1963,6 +2004,7 @@ same_page: case INIT_PAGE: last_offset= FIL_PAGE_TYPE; free_or_init_page: + store_freed_or_init_rec(id, (b & 0x70) == FREE_PAGE); if (UNIV_UNLIKELY(rlen != 0)) goto record_corrupted; break; @@ -2531,7 +2573,7 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, { mtr.start(); mtr.set_log_mode(MTR_LOG_NONE); - block= buf_page_create(page_id, space->zip_size(), &mtr); + block= buf_page_create(space, page_id.page_no(), space->zip_size(), &mtr); p= recv_sys.pages.find(page_id); if (p == recv_sys.pages.end()) { @@ -3240,6 +3282,12 @@ recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace) /* The tablespace was found, and there are some redo log records for it. */ fil_names_dirty(rs.second.space); + + /* Add the freed page ranges in the respective + tablespace */ + if (!rs.second.freed_ranges.empty()) + rs.second.space->add_free_ranges( + std::move(rs.second.freed_ranges)); } else if (rs.second.name == "") { ib::error() << "Missing FILE_CREATE, FILE_DELETE" " or FILE_MODIFY before FILE_CHECKPOINT" diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 8ca0fe65f1e..32e31ee84f4 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -372,6 +372,7 @@ void mtr_t::start() ut_d(m_user_space_id= TRX_SYS_SPACE); m_user_space= nullptr; m_commit_lsn= 0; + m_freed_in_system_tablespace= m_trim_pages= false; } /** Release the resources */ @@ -381,6 +382,7 @@ inline void mtr_t::release_resources() ut_d(m_memo.for_each_block_in_reverse(CIterate<DebugCheck>())); m_log.erase(); m_memo.erase(); + clear_freed_ranges(); ut_d(m_commit= true); } @@ -413,6 +415,30 @@ void mtr_t::commit() to insert into the flush list. */ log_mutex_exit(); + if (!m_freed_ranges.empty()) + { + fil_space_t *freed_space= m_user_space; + /* Get the freed tablespace in case of predefined tablespace */ + if (!freed_space) + { + ut_ad(is_freed_system_tablespace_page()); + freed_space= fil_system.sys_space; + } + + ut_ad(memo_contains(freed_space->latch, MTR_MEMO_X_LOCK)); + /* Update the last freed lsn */ + freed_space->update_last_freed_lsn(m_commit_lsn); + + for (const auto &range : m_freed_ranges) + freed_space->add_free_range(range); + } + + if (is_trim_pages()) + { + ut_ad(m_user_space != nullptr); + m_user_space->clear_freed_ranges(); + } + m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks> (ReleaseBlocks(start_lsn, m_commit_lsn))); if (m_made_dirty) @@ -441,6 +467,8 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn) ut_ad(!m_made_dirty); ut_ad(m_memo.size() == 0); ut_ad(!srv_read_only_mode); + ut_ad(m_freed_ranges.empty()); + ut_ad(!m_freed_in_system_tablespace); if (checkpoint_lsn) { byte* ptr = m_log.push<byte*>(SIZE_OF_FILE_CHECKPOINT); diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index c37a8b98cbd..6747ada6de4 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -696,6 +696,10 @@ not_free: const ulint size = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES; mtr.start(); mtr_x_lock_space(purge_sys.truncate.current, &mtr); + /* Associate the undo tablespace with mtr. + During mtr::commit(), InnoDB can use the undo + tablespace object to clear all freed ranges */ + mtr.set_named_space(purge_sys.truncate.current); mtr.trim_pages(page_id_t(space.id, size)); fsp_header_init(purge_sys.truncate.current, size, &mtr); mutex_enter(&fil_system.mutex); |