summaryrefslogtreecommitdiff
path: root/storage/innobase/buf/buf0flu.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/buf/buf0flu.cc')
-rw-r--r--storage/innobase/buf/buf0flu.cc267
1 files changed, 114 insertions, 153 deletions
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index d7343fbd9ed..373f6eb4539 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -211,7 +211,7 @@ incr_flush_list_size_in_bytes(
{
ut_ad(buf_flush_list_mutex_own(buf_pool));
- buf_pool->stat.flush_list_bytes += block->page.size.physical();
+ buf_pool->stat.flush_list_bytes += block->physical_size();
ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size);
}
@@ -427,137 +427,44 @@ buf_flush_insert_into_flush_list(
ut_ad(buf_page_mutex_own(block));
buf_flush_list_mutex_enter(buf_pool);
-
- ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
- || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
- <= lsn));
-
- /* If we are in the recovery then we need to update the flush
- red-black tree as well. */
- if (buf_pool->flush_rbt != NULL) {
- buf_flush_list_mutex_exit(buf_pool);
- buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
- return;
- }
-
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(!block->page.in_flush_list);
-
ut_d(block->page.in_flush_list = TRUE);
+ ut_ad(!block->page.oldest_modification);
block->page.oldest_modification = lsn;
-
- UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
-
+ UNIV_MEM_ASSERT_RW(block->page.zip.data
+ ? block->page.zip.data : block->frame,
+ block->physical_size());
incr_flush_list_size_in_bytes(block, buf_pool);
-#ifdef UNIV_DEBUG_VALGRIND
- void* p;
-
- if (block->page.size.is_compressed()) {
- p = block->page.zip.data;
- } else {
- p = block->frame;
- }
-
- UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
-#endif /* UNIV_DEBUG_VALGRIND */
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_skip(buf_pool));
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- buf_flush_list_mutex_exit(buf_pool);
-}
-
-/********************************************************************//**
-Inserts a modified block into the flush list in the right sorted position.
-This function is used by recovery, because there the modifications do not
-necessarily come in the order of lsn's. */
-void
-buf_flush_insert_sorted_into_flush_list(
-/*====================================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_block_t* block, /*!< in/out: block which is modified */
- lsn_t lsn) /*!< in: oldest modification */
-{
- buf_page_t* prev_b;
- buf_page_t* b;
-
- ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
- ut_ad(!buf_pool_mutex_own(buf_pool));
- ut_ad(log_flush_order_mutex_own());
- ut_ad(buf_page_mutex_own(block));
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- buf_flush_list_mutex_enter(buf_pool);
-
- /* The field in_LRU_list is protected by buf_pool->mutex, which
- we are not holding. However, while a block is in the flush
- list, it is dirty and cannot be discarded, not from the
- page_hash or from the LRU list. At most, the uncompressed
- page frame of a compressed block may be discarded or created
- (copying the block->page to or from a buf_page_t that is
- dynamically allocated from buf_buddy_alloc()). Because those
- transitions hold block->mutex and the flush list mutex (via
- buf_flush_relocate_on_flush_list()), there is no possibility
- of a race condition in the assertions below. */
- ut_ad(block->page.in_LRU_list);
- ut_ad(block->page.in_page_hash);
- /* buf_buddy_block_register() will take a block in the
- BUF_BLOCK_MEMORY state, not a file page. */
- ut_ad(!block->page.in_zip_hash);
-
- ut_ad(!block->page.in_flush_list);
- ut_d(block->page.in_flush_list = TRUE);
- block->page.oldest_modification = lsn;
-
-#ifdef UNIV_DEBUG_VALGRIND
- void* p;
-
- if (block->page.size.is_compressed()) {
- p = block->page.zip.data;
- } else {
- p = block->frame;
- }
-
- UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
-#endif /* UNIV_DEBUG_VALGRIND */
-
- prev_b = NULL;
-
- /* For the most part when this function is called the flush_rbt
- should not be NULL. In a very rare boundary case it is possible
- that the flush_rbt has already been freed by the recovery thread
- before the last page was hooked up in the flush_list by the
- io-handler thread. In that case we'll just do a simple
- linear search in the else block. */
- if (buf_pool->flush_rbt != NULL) {
-
- prev_b = buf_flush_insert_in_flush_rbt(&block->page);
-
- } else {
-
- b = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
- while (b != NULL && b->oldest_modification
- > block->page.oldest_modification) {
-
- ut_ad(b->in_flush_list);
- prev_b = b;
- b = UT_LIST_GET_NEXT(list, b);
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+ ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
+ /* The field in_LRU_list is protected by buf_pool->mutex, which
+ we are not holding. However, while a block is in the flush
+ list, it is dirty and cannot be discarded, not from the
+ page_hash or from the LRU list. At most, the uncompressed
+ page frame of a compressed block may be discarded or created
+ (copying the block->page to or from a buf_page_t that is
+ dynamically allocated from buf_buddy_alloc()). Because those
+ transitions hold block->mutex and the flush list mutex (via
+ buf_flush_relocate_on_flush_list()), there is no possibility
+ of a race condition in the assertions below. */
+ ut_ad(block->page.in_LRU_list);
+ ut_ad(block->page.in_page_hash);
+ /* buf_buddy_block_register() will take a block in the
+ BUF_BLOCK_MEMORY state, not a file page. */
+ ut_ad(!block->page.in_zip_hash);
+
+ if (buf_page_t* prev_b =
+ buf_flush_insert_in_flush_rbt(&block->page)) {
+ UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
+ goto func_exit;
}
}
- if (prev_b == NULL) {
- UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
- } else {
- UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
- }
-
- incr_flush_list_size_in_bytes(block, buf_pool);
-
+ UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
+func_exit:
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low(buf_pool));
+ ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_list_mutex_exit(buf_pool);
@@ -686,7 +593,7 @@ buf_flush_remove(
}
/* If the flush_rbt is active then delete from there as well. */
- if (buf_pool->flush_rbt != NULL) {
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage);
}
@@ -694,7 +601,7 @@ buf_flush_remove(
because we assert on in_flush_list in comparison function. */
ut_d(bpage->in_flush_list = FALSE);
- buf_pool->stat.flush_list_bytes -= bpage->size.physical();
+ buf_pool->stat.flush_list_bytes -= bpage->physical_size();
bpage->oldest_modification = 0;
@@ -754,7 +661,7 @@ buf_flush_relocate_on_flush_list(
/* If recovery is active we must swap the control blocks in
the flush_rbt as well. */
- if (buf_pool->flush_rbt != NULL) {
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage);
prev_b = buf_flush_insert_in_flush_rbt(dpage);
}
@@ -821,9 +728,9 @@ void buf_flush_write_complete(buf_page_t* bpage, bool dblwr)
/** Calculate the checksum of a page from compressed table and update
the page.
-@param[in,out] page page to update
-@param[in] size compressed page size
-@param[in] lsn LSN to stamp on the page */
+@param[in,out] page page to update
+@param[in] size compressed page size
+@param[in] lsn LSN to stamp on the page */
void
buf_flush_update_zip_checksum(
buf_frame_t* page,
@@ -840,18 +747,44 @@ buf_flush_update_zip_checksum(
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
}
+/** Assign the full crc32 checksum for non-compressed page.
+@param[in,out] page page to be updated */
+void buf_flush_assign_full_crc32_checksum(byte* page)
+{
+ ut_d(bool compressed = false);
+ ut_d(bool corrupted = false);
+ ut_d(const uint size = buf_page_full_crc32_size(page, &compressed,
+ &corrupted));
+ ut_ad(!compressed);
+ ut_ad(!corrupted);
+ ut_ad(size == uint(srv_page_size));
+ const ulint payload = srv_page_size - FIL_PAGE_FCRC32_CHECKSUM;
+ mach_write_to_4(page + payload, ut_crc32(page, payload));
+}
+
/** Initialize a page for writing to the tablespace.
-@param[in] block buffer block; NULL if bypassing the buffer pool
-@param[in,out] page page frame
-@param[in,out] page_zip_ compressed page, or NULL if uncompressed
-@param[in] newest_lsn newest modification LSN to the page */
+@param[in] block buffer block; NULL if bypassing
+ the buffer pool
+@param[in,out] page page frame
+@param[in,out] page_zip_ compressed page, or NULL if
+ uncompressed
+@param[in] newest_lsn newest modification LSN to the page
+@param[in] use_full_checksum whether tablespace uses full checksum */
void
buf_flush_init_for_writing(
const buf_block_t* block,
byte* page,
void* page_zip_,
- lsn_t newest_lsn)
+ lsn_t newest_lsn,
+ bool use_full_checksum)
{
+ if (block != NULL && block->frame != page) {
+ /* If page is encrypted in full crc32 format then
+ checksum stored already as a part of fil_encrypt_buf() */
+ ut_ad(use_full_checksum);
+ return;
+ }
+
ut_ad(block == NULL || block->frame == page);
ut_ad(block == NULL || page_zip_ == NULL
|| &block->page.zip == page_zip_);
@@ -900,8 +833,13 @@ buf_flush_init_for_writing(
/* Write the newest modification lsn to the page header and trailer */
mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
- mach_write_to_8(page + srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM,
- newest_lsn);
+ if (use_full_checksum) {
+ mach_write_to_4(page + srv_page_size - FIL_PAGE_FCRC32_END_LSN,
+ (ulint) newest_lsn);
+ } else {
+ mach_write_to_8(page + srv_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM,
+ newest_lsn);
+ }
if (block && srv_page_size == 16384) {
/* The page type could be garbage in old files
@@ -967,6 +905,10 @@ buf_flush_init_for_writing(
uint32_t checksum = BUF_NO_CHECKSUM_MAGIC;
+ if (use_full_checksum) {
+ return buf_flush_assign_full_crc32_checksum(page);
+ }
+
switch (srv_checksum_algorithm_t(srv_checksum_algorithm)) {
case SRV_CHECKSUM_ALGORITHM_INNODB:
case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
@@ -979,6 +921,8 @@ buf_flush_init_for_writing(
be calculated after storing the new formula checksum. */
checksum = buf_calc_page_old_checksum(page);
break;
+ case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
case SRV_CHECKSUM_ALGORITHM_CRC32:
case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
/* In other cases we write the same checksum to both fields. */
@@ -1021,7 +965,10 @@ buf_flush_write_block_low(
|| space->purpose == FIL_TYPE_TABLESPACE);
ut_ad((space->purpose == FIL_TYPE_TEMPORARY)
== (space == fil_system.temp_space));
+
page_t* frame = NULL;
+ const bool full_crc32 = space->full_crc32();
+
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(!buf_pool_mutex_own(buf_pool));
@@ -1065,7 +1012,7 @@ buf_flush_write_block_low(
mach_write_to_8(frame + FIL_PAGE_LSN,
bpage->newest_modification);
- ut_a(page_zip_verify_checksum(frame, bpage->size.physical()));
+ ut_a(page_zip_verify_checksum(frame, bpage->zip_size()));
break;
case BUF_BLOCK_FILE_PAGE:
frame = bpage->zip.data;
@@ -1073,15 +1020,23 @@ buf_flush_write_block_low(
frame = ((buf_block_t*) bpage)->frame;
}
+ byte* page = reinterpret_cast<const buf_block_t*>(bpage)->frame;
+
+ if (full_crc32) {
+ page = buf_page_encrypt(space, bpage, page);
+ frame = page;
+ }
+
buf_flush_init_for_writing(
- reinterpret_cast<const buf_block_t*>(bpage),
- reinterpret_cast<const buf_block_t*>(bpage)->frame,
+ reinterpret_cast<const buf_block_t*>(bpage), page,
bpage->zip.data ? &bpage->zip : NULL,
- bpage->newest_modification);
+ bpage->newest_modification, full_crc32);
break;
}
- frame = buf_page_encrypt_before_write(space, bpage, frame);
+ if (!full_crc32) {
+ frame = buf_page_encrypt(space, bpage, frame);
+ }
ut_ad(space->purpose == FIL_TYPE_TABLESPACE
|| space->atomic_write_supported);
@@ -1092,7 +1047,8 @@ buf_flush_write_block_low(
/* TODO: pass the tablespace to fil_io() */
fil_io(request,
- sync, bpage->id, bpage->size, 0, bpage->size.physical(),
+ sync, bpage->id, bpage->zip_size(), 0,
+ bpage->physical_size(),
frame, bpage);
} else {
ut_ad(!srv_read_only_mode);
@@ -1353,9 +1309,13 @@ buf_flush_try_neighbors(
buf_pool_t* buf_pool = buf_pool_get(page_id);
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
+ fil_space_t* space = fil_space_acquire_for_io(page_id.space());
+ if (!space) {
+ return 0;
+ }
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN
- || srv_flush_neighbors == 0) {
+ || !srv_flush_neighbors || !space->is_rotational()) {
/* If there is little space or neighbor flushing is
not enabled then just flush the victim. */
low = page_id.page_no();
@@ -1410,9 +1370,8 @@ buf_flush_try_neighbors(
}
}
- const ulint space_size = fil_space_get_size(page_id.space());
- if (high > space_size) {
- high = space_size;
+ if (high > space->size) {
+ high = space->size;
}
DBUG_PRINT("ib_buf", ("flush %u:%u..%u",
@@ -1489,6 +1448,8 @@ buf_flush_try_neighbors(
buf_pool_mutex_exit(buf_pool);
}
+ space->release_for_io();
+
if (count > 1) {
MONITOR_INC_VALUE_CUMULATIVE(
MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
@@ -3073,7 +3034,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
" See the man page of setpriority().";
}
/* Signal that setpriority() has been attempted. */
- os_event_set(recv_sys->flush_end);
+ os_event_set(recv_sys.flush_end);
#endif /* UNIV_LINUX */
do {
@@ -3081,13 +3042,13 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
ulint n_flushed_lru = 0;
ulint n_flushed_list = 0;
- os_event_wait(recv_sys->flush_start);
+ os_event_wait(recv_sys.flush_start);
if (!recv_writer_thread_active) {
break;
}
- switch (recv_sys->flush_type) {
+ switch (recv_sys.flush_type) {
case BUF_FLUSH_LRU:
/* Flush pages from end of LRU if required */
pc_request(0, LSN_MAX);
@@ -3108,8 +3069,8 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
ut_ad(0);
}
- os_event_reset(recv_sys->flush_start);
- os_event_set(recv_sys->flush_end);
+ os_event_reset(recv_sys.flush_start);
+ os_event_set(recv_sys.flush_end);
} while (recv_writer_thread_active);
os_event_wait(buf_flush_event);
@@ -3594,7 +3555,7 @@ buf_flush_validate_low(
/* If we are in recovery mode i.e.: flush_rbt != NULL
then each block in the flush_list must also be present
in the flush_rbt. */
- if (buf_pool->flush_rbt != NULL) {
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
rnode = rbt_first(buf_pool->flush_rbt);
}
@@ -3615,7 +3576,7 @@ buf_flush_validate_low(
|| buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
ut_a(om > 0);
- if (buf_pool->flush_rbt != NULL) {
+ if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_page_t** prpage;
ut_a(rnode != NULL);