diff options
Diffstat (limited to 'storage/innobase/buf/buf0buf.cc')
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 294 |
1 files changed, 172 insertions, 122 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 96821478e60..6efa14e6791 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -53,7 +53,6 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "srv0mon.h" #include "buf0checksum.h" -#include "buf0dblwr.h" /* IMPLEMENTATION OF THE BUFFER POOL @@ -372,10 +371,6 @@ buf_get_total_list_len( buf_pool = buf_pool_from_array(i); - if (!buf_pool) { - continue; - } - *LRU_len += UT_LIST_GET_LEN(buf_pool->LRU); *free_len += UT_LIST_GET_LEN(buf_pool->free); *flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list); @@ -383,6 +378,32 @@ buf_get_total_list_len( } /********************************************************************//** +Get total list size in bytes from all buffer pools. */ +UNIV_INTERN +void +buf_get_total_list_size_in_bytes( +/*=============================*/ + buf_pools_list_size_t* buf_pools_list_size) /*!< out: list sizes + in all buffer pools */ +{ + ut_ad(buf_pools_list_size); + memset(buf_pools_list_size, 0, sizeof(*buf_pools_list_size)); + + for (ulint i = 0; i < srv_buf_pool_instances; i++) { + buf_pool_t* buf_pool; + + buf_pool = buf_pool_from_array(i); + /* We don't need mutex protection since this is + for statistics purpose */ + buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes; + buf_pools_list_size->unzip_LRU_bytes += + UT_LIST_GET_LEN(buf_pool->unzip_LRU) * UNIV_PAGE_SIZE; + buf_pools_list_size->flush_list_bytes += + buf_pool->stat.flush_list_bytes; + } +} + +/********************************************************************//** Get total buffer pool statistics. */ UNIV_INTERN void @@ -400,10 +421,6 @@ buf_get_total_stat( buf_pool = buf_pool_from_array(i); - if (!buf_pool) { - continue; - } - buf_stat = &buf_pool->stat; tot_stat->n_page_gets += buf_stat->n_page_gets; tot_stat->n_pages_read += buf_stat->n_pages_read; @@ -456,6 +473,8 @@ UNIV_INTERN ibool buf_page_is_corrupted( /*==================*/ + bool check_lsn, /*!< in: true if we need to check + and complain about the LSN */ const byte* read_buf, /*!< in: a database page */ ulint zip_size) /*!< in: size of compressed page; 0 for uncompressed pages */ @@ -480,14 +499,17 @@ buf_page_is_corrupted( if (recv_lsn_checks_on) { lsn_t current_lsn; - if (log_peek_lsn(¤t_lsn) - && UNIV_UNLIKELY - (current_lsn - < mach_read_from_8(read_buf + FIL_PAGE_LSN))) { + /* Since we are going to reset the page LSN during the import + phase it makes no sense to spam the log with error messages. */ + + if (check_lsn + && log_peek_lsn(¤t_lsn) + && current_lsn + < mach_read_from_8(read_buf + FIL_PAGE_LSN)) { ut_print_timestamp(stderr); fprintf(stderr, - " InnoDB: Error: page %lu log sequence number" + " InnoDB: Error: page %lu log sequence number" " " LSN_PF "\n" "InnoDB: is in the future! Current system " "log sequence number " LSN_PF ".\n" @@ -673,6 +695,8 @@ buf_page_is_corrupted( is added and not handled here */ } + DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); ); + return(FALSE); } @@ -885,7 +909,7 @@ pfs_register_buffer_block( PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER); for (i = 0; i < num_to_register; i++) { - mutex_t* mutex; + ib_mutex_t* mutex; rw_lock_t* rwlock; # ifdef UNIV_PFS_MUTEX @@ -1267,7 +1291,7 @@ buf_pool_init_instance( SYNC_BUF_FLUSH_LIST); for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) { - buf_pool->no_flush[i] = os_event_create(NULL); + buf_pool->no_flush[i] = os_event_create(); } buf_pool->watch = (buf_page_t*) mem_zalloc( @@ -1334,7 +1358,7 @@ buf_pool_free_instance( Creates the buffer pool. @return DB_SUCCESS if success, DB_ERROR if not enough memory or error */ UNIV_INTERN -ulint +dberr_t buf_pool_init( /*==========*/ ulint total_size, /*!< in: size of the total pool in bytes */ @@ -1731,7 +1755,7 @@ buf_pool_watch_unset( ut_a(bpage); if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) { - mutex_t* mutex = buf_page_get_mutex(bpage); + ib_mutex_t* mutex = buf_page_get_mutex(bpage); mutex_enter(mutex); ut_a(bpage->buf_fix_count > 0); @@ -1802,34 +1826,24 @@ buf_page_make_young( } /********************************************************************//** -Sets the time of the first access of a page and moves a page to the -start of the buffer pool LRU list if it is too old. This high-level -function can be used to prevent an important page from slipping -out of the buffer pool. */ +Moves a page to the start of the buffer pool LRU list if it is too old. +This high-level function can be used to prevent an important page from +slipping out of the buffer pool. */ static void -buf_page_set_accessed_make_young( -/*=============================*/ - buf_page_t* bpage, /*!< in/out: buffer block of a +buf_page_make_young_if_needed( +/*==========================*/ + buf_page_t* bpage) /*!< in/out: buffer block of a file page */ - unsigned access_time) /*!< in: bpage->access_time - read under mutex protection, - or 0 if unknown */ { +#ifdef UNIV_DEBUG buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(!buf_pool_mutex_own(buf_pool)); +#endif /* UNIV_DEBUG */ ut_a(buf_page_in_file(bpage)); if (buf_page_peek_if_too_old(bpage)) { - buf_pool_mutex_enter(buf_pool); - buf_LRU_make_block_young(bpage); - buf_pool_mutex_exit(buf_pool); - } else if (!access_time) { - ulint time_ms = ut_time_ms(); - buf_pool_mutex_enter(buf_pool); - buf_page_set_accessed(bpage, time_ms); - buf_pool_mutex_exit(buf_pool); + buf_page_make_young(bpage); } } @@ -1880,7 +1894,7 @@ buf_page_set_file_page_was_freed( &hash_lock); if (bpage) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage)); mutex_enter(block_mutex); rw_lock_s_unlock(hash_lock); @@ -1913,7 +1927,7 @@ buf_page_reset_file_page_was_freed( bpage = buf_page_hash_get_s_locked(buf_pool, space, offset, &hash_lock); if (bpage) { - mutex_t* block_mutex = buf_page_get_mutex(bpage); + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage)); mutex_enter(block_mutex); rw_lock_s_unlock(hash_lock); @@ -1974,11 +1988,10 @@ buf_page_get_zip( ulint offset) /*!< in: page number */ { buf_page_t* bpage; - mutex_t* block_mutex; + ib_mutex_t* block_mutex; rw_lock_t* hash_lock; ibool discard_attempted = FALSE; ibool must_read; - unsigned access_time; buf_pool_t* buf_pool = buf_pool_get(space, offset); buf_pool->stat.n_page_gets++; @@ -2051,15 +2064,17 @@ err_exit: got_block: must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ; - access_time = buf_page_is_accessed(bpage); rw_lock_s_unlock(hash_lock); #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(!bpage->file_page_was_freed); #endif + + buf_page_set_accessed(bpage); + mutex_exit(block_mutex); - buf_page_set_accessed_make_young(bpage, access_time); + buf_page_make_young_if_needed(bpage); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 5771 || buf_validate()); @@ -2372,6 +2387,28 @@ buf_block_is_uncompressed( return(buf_pointer_is_block_field_instance(buf_pool, (void*) block)); } +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG +/********************************************************************//** +Return true if probe is enabled. +@return true if probe enabled. */ +static +bool +buf_debug_execute_is_force_flush() +/*==============================*/ +{ + DBUG_EXECUTE_IF("ib_buf_force_flush", return(true); ); + + /* This is used during queisce testing, we want to ensure maximum + buffering by the change buffer. */ + + if (srv_ibuf_disable_background_merge) { + return(true); + } + + return(false); +} +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + /********************************************************************//** This is the general function used to get access to a database page. @return pointer to the block or NULL */ @@ -2398,7 +2435,7 @@ buf_page_get_gen( ulint fix_type; ibool must_read; rw_lock_t* hash_lock; - mutex_t* block_mutex; + ib_mutex_t* block_mutex; buf_page_t* hash_bpage; ulint retries = 0; buf_pool_t* buf_pool = buf_pool_get(space, offset); @@ -2666,27 +2703,38 @@ wait_until_unfixed: block->page.buf_fix_count = 1; buf_block_set_io_fix(block, BUF_IO_READ); - rw_lock_x_lock_func(&block->lock, 0, file, line); + rw_lock_x_lock_inline(&block->lock, 0, file, line); UNIV_MEM_INVALID(bpage, sizeof *bpage); rw_lock_x_unlock(hash_lock); - mutex_exit(&block->mutex); - mutex_exit(&buf_pool->zip_mutex); - buf_pool->n_pend_unzip++; + buf_pool->n_pend_unzip++; buf_pool_mutex_exit(buf_pool); + access_time = buf_page_is_accessed(&block->page); + mutex_exit(&block->mutex); + mutex_exit(&buf_pool->zip_mutex); + buf_page_free_descriptor(bpage); - /* Decompress the page and apply buffered operations - while not holding buf_pool->mutex or block->mutex. */ + /* Decompress the page while not holding + buf_pool->mutex or block->mutex. */ - ut_a(buf_zip_decompress(block, TRUE)); + /* Page checksum verification is already done when + the page is read from disk. Hence page checksum + verification is not necessary when decompressing the page. */ + ut_a(buf_zip_decompress(block, FALSE)); if (UNIV_LIKELY(!recv_no_ibuf_operations)) { - ibuf_merge_or_delete_for_page(block, space, offset, - zip_size, TRUE); + if (access_time) { +#ifdef UNIV_IBUF_COUNT_DEBUG + ut_a(ibuf_count_get(space, offset) == 0); +#endif /* UNIV_IBUF_COUNT_DEBUG */ + } else { + ibuf_merge_or_delete_for_page( + block, space, offset, zip_size, TRUE); + } } /* Unfix and unlatch the block. */ @@ -2723,8 +2771,9 @@ wait_until_unfixed: UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page); #endif #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG + if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH) - && ibuf_debug) { + && (ibuf_debug || buf_debug_execute_is_force_flush())) { /* Try to evict the block from the buffer pool, to use the insert buffer (change buffer) as much as possible. */ @@ -2759,19 +2808,18 @@ wait_until_unfixed: buf_pool, space, offset, fold); } - if (UNIV_LIKELY_NULL(block)) { - block_mutex = buf_page_get_mutex( - &block->page); - /* The page entered the buffer - pool for some reason. Try to - evict it again. */ - mutex_enter(block_mutex); - rw_lock_x_unlock(hash_lock); + rw_lock_x_unlock(hash_lock); - goto got_block; + if (UNIV_LIKELY_NULL(block)) { + /* Either the page has been read in or + a watch was set on that in the window + where we released the buf_pool::mutex + and before we acquire the hash_lock + above. Try again. */ + guess = block; + goto loop; } - rw_lock_x_unlock(hash_lock); fprintf(stderr, "innodb_change_buffering_debug evict %u %u\n", (unsigned) space, (unsigned) offset); @@ -2799,14 +2847,15 @@ wait_until_unfixed: ut_a(mode == BUF_GET_POSSIBLY_FREED || !block->page.file_page_was_freed); #endif - mutex_exit(&block->mutex); - /* Check if this is the first access to the page */ - access_time = buf_page_is_accessed(&block->page); - if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) { - buf_page_set_accessed_make_young(&block->page, access_time); + buf_page_set_accessed(&block->page); + + mutex_exit(&block->mutex); + + if (mode != BUF_PEEK_IF_IN_POOL) { + buf_page_make_young_if_needed(&block->page); } #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG @@ -2842,14 +2891,14 @@ wait_until_unfixed: break; case RW_S_LATCH: - rw_lock_s_lock_func(&(block->lock), 0, file, line); + rw_lock_s_lock_inline(&(block->lock), 0, file, line); fix_type = MTR_MEMO_PAGE_S_FIX; break; default: ut_ad(rw_latch == RW_X_LATCH); - rw_lock_x_lock_func(&(block->lock), 0, file, line); + rw_lock_x_lock_inline(&(block->lock), 0, file, line); fix_type = MTR_MEMO_PAGE_X_FIX; break; @@ -2857,7 +2906,7 @@ wait_until_unfixed: mtr_memo_push(mtr, block, fix_type); - if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) { + if (mode != BUF_PEEK_IF_IN_POOL && !access_time) { /* In the case of a first access, try to apply linear read-ahead */ @@ -2912,15 +2961,13 @@ buf_page_optimistic_get( buf_block_buf_fix_inc(block, file, line); - mutex_exit(&block->mutex); + access_time = buf_page_is_accessed(&block->page); - /* Check if this is the first access to the page. - We do a dirty read on purpose, to avoid mutex contention. - This field is only used for heuristic purposes; it does not - affect correctness. */ + buf_page_set_accessed(&block->page); - access_time = buf_page_is_accessed(&block->page); - buf_page_set_accessed_make_young(&block->page, access_time); + mutex_exit(&block->mutex); + + buf_page_make_young_if_needed(&block->page); ut_ad(!ibuf_inside(mtr) || ibuf_page(buf_block_get_space(block), @@ -2932,8 +2979,8 @@ buf_page_optimistic_get( file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { - success = rw_lock_x_lock_func_nowait(&(block->lock), - file, line); + success = rw_lock_x_lock_func_nowait_inline(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -2975,7 +3022,7 @@ buf_page_optimistic_get( mutex_exit(&block->mutex); #endif - if (UNIV_UNLIKELY(!access_time)) { + if (!access_time) { /* In the case of a first access, try to apply linear read-ahead */ @@ -3038,24 +3085,14 @@ buf_page_get_known_nowait( buf_block_buf_fix_inc(block, file, line); + buf_page_set_accessed(&block->page); + mutex_exit(&block->mutex); buf_pool = buf_pool_from_block(block); - if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) { - buf_pool_mutex_enter(buf_pool); - buf_LRU_make_block_young(&block->page); - buf_pool_mutex_exit(buf_pool); - } else if (!buf_page_is_accessed(&block->page)) { - /* Above, we do a dirty read on purpose, to avoid - mutex contention. The field buf_page_t::access_time - is only used for heuristic purposes. Writes to the - field must be protected by mutex, however. */ - ulint time_ms = ut_time_ms(); - - buf_pool_mutex_enter(buf_pool); - buf_page_set_accessed(&block->page, time_ms); - buf_pool_mutex_exit(buf_pool); + if (mode == BUF_MAKE_YOUNG) { + buf_page_make_young_if_needed(&block->page); } ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD); @@ -3065,8 +3102,8 @@ buf_page_get_known_nowait( file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { - success = rw_lock_x_lock_func_nowait(&(block->lock), - file, line); + success = rw_lock_x_lock_func_nowait_inline(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -3167,8 +3204,8 @@ buf_page_try_get_func( S-latch. */ fix_type = MTR_MEMO_PAGE_X_FIX; - success = rw_lock_x_lock_func_nowait(&block->lock, - file, line); + success = rw_lock_x_lock_func_nowait_inline(&block->lock, + file, line); } if (!success) { @@ -3234,6 +3271,7 @@ buf_page_init( ulint offset, /*!< in: offset of the page within space in units of a page */ ulint fold, /*!< in: buf_page_address_fold(space,offset) */ + ulint zip_size,/*!< in: compressed page size, or 0 */ buf_block_t* block) /*!< in/out: block to init */ { buf_page_t* hash_page; @@ -3302,6 +3340,9 @@ buf_page_init( ut_d(block->page.in_page_hash = TRUE); HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, &block->page); + if (zip_size) { + page_zip_set_size(&block->page.zip, zip_size); + } } /********************************************************************//** @@ -3318,7 +3359,7 @@ UNIV_INTERN buf_page_t* buf_page_init_for_read( /*===================*/ - ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */ + dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */ ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size, or 0 */ @@ -3407,7 +3448,7 @@ err_exit: ut_ad(buf_pool_from_bpage(bpage) == buf_pool); - buf_page_init(buf_pool, space, offset, fold, block); + buf_page_init(buf_pool, space, offset, fold, zip_size, block); rw_lock_x_unlock(hash_lock); /* The block must be put to the LRU list, to the old blocks */ @@ -3426,8 +3467,6 @@ err_exit: buf_page_set_io_fix(bpage, BUF_IO_READ); if (zip_size) { - page_zip_set_size(&block->page.zip, zip_size); - /* buf_pool->mutex may be released and reacquired by buf_buddy_alloc(). Thus, we must release block->mutex in order not to @@ -3528,7 +3567,8 @@ err_exit: rw_lock_x_unlock(hash_lock); - /* The block must be put to the LRU list, to the old blocks */ + /* The block must be put to the LRU list, to the old blocks. + The zip_size is already set into the page zip */ buf_LRU_add_block(bpage, TRUE/* to old blocks */); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG buf_LRU_insert_zip_clean(bpage); @@ -3578,7 +3618,6 @@ buf_page_create( buf_block_t* block; ulint fold; buf_block_t* free_block = NULL; - ulint time_ms = ut_time_ms(); buf_pool_t* buf_pool = buf_pool_get(space, offset); rw_lock_t* hash_lock; @@ -3630,7 +3669,7 @@ buf_page_create( mutex_enter(&block->mutex); - buf_page_init(buf_pool, space, offset, fold, block); + buf_page_init(buf_pool, space, offset, fold, zip_size, block); rw_lock_x_unlock(hash_lock); @@ -3651,7 +3690,6 @@ buf_page_create( buf_page_set_io_fix(&block->page, BUF_IO_READ); rw_lock_x_lock(&block->lock); - page_zip_set_size(&block->page.zip, zip_size); mutex_exit(&block->mutex); /* buf_pool->mutex may be released and reacquired by buf_buddy_alloc(). Thus, we must release block->mutex @@ -3675,12 +3713,12 @@ buf_page_create( rw_lock_x_unlock(&block->lock); } - buf_page_set_accessed(&block->page, time_ms); - buf_pool_mutex_exit(buf_pool); mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX); + buf_page_set_accessed(&block->page); + mutex_exit(&block->mutex); /* Delete possible entries for the page from the insert buffer: @@ -3849,6 +3887,8 @@ buf_mark_space_corrupt( BUF_IO_READ); } + mutex_exit(buf_page_get_mutex(bpage)); + /* Find the table with specified space id, and mark it corrupted */ if (dict_set_corrupted_by_space(space)) { buf_LRU_free_one_page(bpage); @@ -3859,7 +3899,6 @@ buf_mark_space_corrupt( ut_ad(buf_pool->n_pend_reads > 0); buf_pool->n_pend_reads--; - mutex_exit(buf_page_get_mutex(bpage)); buf_pool_mutex_exit(buf_pool); return(ret); @@ -3868,9 +3907,9 @@ buf_mark_space_corrupt( /********************************************************************//** Completes an asynchronous read or write request of a file page to or from the buffer pool. -@return TRUE if successful */ +@return true if successful */ UNIV_INTERN -ibool +bool buf_page_io_complete( /*=================*/ buf_page_t* bpage) /*!< in: pointer to the block in question */ @@ -3952,8 +3991,20 @@ buf_page_io_complete( /* From version 3.23.38 up we store the page checksum to the 4 first bytes of the page end lsn field */ - if (buf_page_is_corrupted(frame, + if (buf_page_is_corrupted(true, frame, buf_page_get_zip_size(bpage))) { + + /* Not a real corruption if it was triggered by + error injection */ + DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", + if (bpage->space > TRX_SYS_SPACE + && buf_mark_space_corrupt(bpage)) { + ib_logf(IB_LOG_LEVEL_INFO, + "Simulated page corruption"); + return(true); + } + goto page_not_corrupt; + ;); corrupt: fprintf(stderr, "InnoDB: Database page corruption on disk" @@ -3997,7 +4048,7 @@ corrupt: table as corrupted instead of crashing server */ if (bpage->space > TRX_SYS_SPACE && buf_mark_space_corrupt(bpage)) { - return(FALSE); + return(false); } else { fputs("InnoDB: Ending processing" " because of" @@ -4008,6 +4059,9 @@ corrupt: } } + DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", + page_not_corrupt: bpage = bpage; ); + if (recv_recovery_is_on()) { /* Pages must be uncompressed for crash recovery. */ ut_a(uncompressed); @@ -4090,7 +4144,7 @@ corrupt: mutex_exit(buf_page_get_mutex(bpage)); buf_pool_mutex_exit(buf_pool); - return(TRUE); + return(true); } /*********************************************************************//** @@ -5118,9 +5172,7 @@ void buf_refresh_io_stats_all(void) /*==========================*/ { - ulint i; - - for (i = 0; i < srv_buf_pool_instances; i++) { + for (ulint i = 0; i < srv_buf_pool_instances; i++) { buf_pool_t* buf_pool; buf_pool = buf_pool_from_array(i); @@ -5137,9 +5189,7 @@ ibool buf_all_freed(void) /*===============*/ { - ulint i; - - for (i = 0; i < srv_buf_pool_instances; i++) { + for (ulint i = 0; i < srv_buf_pool_instances; i++) { buf_pool_t* buf_pool; buf_pool = buf_pool_from_array(i); |