diff options
Diffstat (limited to 'storage/innobase/buf/buf0buf.cc')
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 693 |
1 files changed, 321 insertions, 372 deletions
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 1e181872e87..fb911fc29f5 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -126,24 +126,15 @@ in the file along with the file page, resides in the control block. Buffer pool struct ------------------ -The buffer buf_pool contains a single mutex which protects all the +The buffer buf_pool contains several mutexes which protect all the control data structures of the buf_pool. The content of a buffer frame is protected by a separate read-write lock in its control block, though. -These locks can be locked and unlocked without owning the buf_pool.mutex. -The OS events in the buf_pool struct can be waited for without owning the -buf_pool.mutex. - -The buf_pool.mutex is a hot-spot in main memory, causing a lot of -memory bus traffic on multiprocessor systems when processors -alternately access the mutex. On our Pentium, the mutex is accessed -maybe every 10 microseconds. We gave up the solution to have mutexes -for each control block, for instance, because it seemed to be -complicated. - -A solution to reduce mutex contention of the buf_pool.mutex is to -create a separate mutex for the page hash table. On Pentium, -accessing the hash table takes 2 microseconds, about half -of the total buf_pool.mutex hold time. + +buf_pool.mutex protects the buf_pool.LRU list and buf_page_t::state; +buf_pool.free_list_mutex protects the free_list and withdraw list; +buf_pool.flush_state_mutex protects the flush state related data structures; +buf_pool.zip_free mutex protects the zip_free arrays; +buf_pool.zip_hash mutex protects the zip_hash hash and in_zip_hash flag. Control blocks -------------- @@ -158,16 +149,6 @@ The buffer frames have to be aligned so that the start memory address of a frame is divisible by the universal page size, which is a power of two. -We intend to make the buffer buf_pool size on-line reconfigurable, -that is, the buf_pool size can be changed without closing the database. -Then the database administarator may adjust it to be bigger -at night, for example. The control block array must -contain enough control blocks for the maximum buffer buf_pool size -which is used in the particular database. -If the buf_pool size is cut, we exploit the virtual memory mechanism of -the OS, and just refrain from using frames at high addresses. Then the OS -can swap them to disk. - The control blocks containing file pages are put to a hash table according to the file address of the page. We could speed up the access to an individual page by using @@ -1522,8 +1503,7 @@ bool buf_pool_t::create() n_chunks= srv_buf_pool_size / srv_buf_pool_chunk_unit; const size_t chunk_size= srv_buf_pool_chunk_unit; - chunks= static_cast<buf_pool_t::chunk_t*>(ut_zalloc_nokey(n_chunks * - sizeof *chunks)); + chunks= static_cast<chunk_t*>(ut_zalloc_nokey(n_chunks * sizeof *chunks)); UT_LIST_INIT(free, &buf_page_t::list); curr_size= 0; auto chunk= chunks; @@ -1555,8 +1535,12 @@ bool buf_pool_t::create() while (++chunk < chunks + n_chunks); ut_ad(is_initialised()); - mutex_create(LATCH_ID_BUF_POOL, &mutex); + mutex_create(LATCH_ID_BUF_POOL_LRU_LIST, &mutex); + mutex_create(LATCH_ID_BUF_POOL_FREE_LIST, &free_list_mutex); + mutex_create(LATCH_ID_BUF_POOL_ZIP_FREE, &zip_free_mutex); + mutex_create(LATCH_ID_BUF_POOL_ZIP_HASH, &zip_hash_mutex); mutex_create(LATCH_ID_BUF_POOL_ZIP, &zip_mutex); + mutex_create(LATCH_ID_BUF_POOL_FLUSH_STATE, &flush_state_mutex); UT_LIST_INIT(LRU, &buf_page_t::LRU); UT_LIST_INIT(withdraw, &buf_page_t::list); @@ -1610,14 +1594,9 @@ bool buf_pool_t::create() io_buf.create((srv_n_read_io_threads + srv_n_write_io_threads) * OS_AIO_N_PENDING_IOS_PER_THREAD); - /* FIXME: remove some of these variables */ - srv_buf_pool_curr_size= curr_pool_size; - srv_buf_pool_old_size= srv_buf_pool_size; - srv_buf_pool_base_size= srv_buf_pool_size; - chunk_t::map_ref= chunk_t::map_reg; buf_LRU_old_ratio_update(100 * 3 / 8, false); - btr_search_sys_create(srv_buf_pool_curr_size / sizeof(void*) / 64); + btr_search_sys_create(curr_pool_size / sizeof(void*) / 64); ut_ad(is_initialised()); return false; } @@ -1630,6 +1609,10 @@ void buf_pool_t::close() return; mutex_free(&mutex); + mutex_free(&free_list_mutex); + mutex_free(&zip_free_mutex); + mutex_free(&zip_hash_mutex); + mutex_free(&flush_state_mutex); mutex_free(&zip_mutex); mutex_free(&flush_list_mutex); @@ -1807,21 +1790,18 @@ inline bool buf_pool_t::realloc(buf_block_t *block) new_block->page.id.page_no())); rw_lock_x_unlock(hash_lock); + mutex_exit(&block->mutex); mutex_exit(&new_block->mutex); /* free block */ buf_block_set_state(block, BUF_BLOCK_MEMORY); buf_LRU_block_free_non_file_page(block); - - mutex_exit(&block->mutex); } else { rw_lock_x_unlock(hash_lock); mutex_exit(&block->mutex); /* free new_block */ - mutex_enter(&new_block->mutex); buf_LRU_block_free_non_file_page(new_block); - mutex_exit(&new_block->mutex); } return(true); /* free_list was enough */ @@ -1858,21 +1838,24 @@ inline bool buf_pool_t::withdraw_blocks() { buf_block_t* block; ulint loop_count = 0; + ulint lru_len; ib::info() << "start to withdraw the last " << withdraw_target << " blocks"; /* Minimize zip_free[i] lists */ - mutex_enter(&mutex); buf_buddy_condense_free(); + + mutex_enter(&mutex); + lru_len = UT_LIST_GET_LEN(LRU); mutex_exit(&mutex); + mutex_enter(&free_list_mutex); while (UT_LIST_GET_LEN(withdraw) < withdraw_target) { /* try to withdraw from free_list */ ulint count1 = 0; - mutex_enter(&mutex); block = reinterpret_cast<buf_block_t*>( UT_LIST_GET_FIRST(free)); while (block != NULL @@ -1887,7 +1870,7 @@ inline bool buf_pool_t::withdraw_blocks() UT_LIST_GET_NEXT( list, &block->page)); - if (buf_pool.will_be_withdrawn(block->page)) { + if (will_be_withdrawn(block->page)) { /* This should be withdrawn */ UT_LIST_REMOVE(free, &block->page); UT_LIST_ADD_LAST(withdraw, &block->page); @@ -1897,7 +1880,6 @@ inline bool buf_pool_t::withdraw_blocks() block = next_block; } - mutex_exit(&mutex); /* reserve free_list length */ if (UT_LIST_GET_LEN(withdraw) < withdraw_target) { @@ -1905,15 +1887,12 @@ inline bool buf_pool_t::withdraw_blocks() flush_counters_t n; /* cap scan_depth with current LRU size. */ - mutex_enter(&mutex); - scan_depth = UT_LIST_GET_LEN(LRU); - mutex_exit(&mutex); - scan_depth = ut_min( ut_max(withdraw_target - UT_LIST_GET_LEN(withdraw), static_cast<ulint>(srv_LRU_scan_depth)), - scan_depth); + lru_len); + mutex_exit(&free_list_mutex); buf_flush_do_batch(BUF_FLUSH_LRU, scan_depth, 0, &n); buf_flush_wait_batch_end(BUF_FLUSH_LRU); @@ -1925,6 +1904,9 @@ inline bool buf_pool_t::withdraw_blocks() MONITOR_LRU_BATCH_FLUSH_PAGES, n.flushed); } + } else { + + mutex_exit(&free_list_mutex); } /* relocate blocks/buddies in withdrawn area */ @@ -1946,33 +1928,27 @@ inline bool buf_pool_t::withdraw_blocks() && will_be_withdrawn(bpage->zip.data) && buf_page_can_relocate(bpage)) { mutex_exit(block_mutex); - buf_pool_mutex_exit_forbid(); if (!buf_buddy_realloc( bpage->zip.data, page_zip_get_size(&bpage->zip))) { /* failed to allocate block */ - buf_pool_mutex_exit_allow(); break; } - buf_pool_mutex_exit_allow(); mutex_enter(block_mutex); count2++; } if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE - && buf_pool.will_be_withdrawn(*bpage)) { + && will_be_withdrawn(*bpage)) { if (buf_page_can_relocate(bpage)) { mutex_exit(block_mutex); - buf_pool_mutex_exit_forbid(); if (!realloc( reinterpret_cast<buf_block_t*>( bpage))) { /* failed to allocate block */ - buf_pool_mutex_exit_allow(); break; } - buf_pool_mutex_exit_allow(); count2++; } else { mutex_exit(block_mutex); @@ -1985,8 +1961,16 @@ inline bool buf_pool_t::withdraw_blocks() bpage = next_bpage; } + mutex_exit(&mutex); + if (++loop_count >= 10) { + ib::info() << "will retry to withdraw later"; + return true; + } + + mutex_enter(&free_list_mutex); + buf_resize_status( "withdrawing blocks. (" ULINTPF "/" ULINTPF ")", UT_LIST_GET_LEN(withdraw), @@ -1997,17 +1981,8 @@ inline bool buf_pool_t::withdraw_blocks() << " Tried to relocate " << count2 << " pages (" << UT_LIST_GET_LEN(withdraw) << "/" << withdraw_target << ")"; - - if (++loop_count >= 10) { - /* give up for now. - retried after user threads paused. */ - - ib::info() << "will retry to withdraw later"; - - /* need retry later */ - return(true); - } } + mutex_exit(&free_list_mutex); /* confirm withdrawn enough */ for (const chunk_t* chunk = chunks + n_chunks_new, @@ -2019,9 +1994,13 @@ inline bool buf_pool_t::withdraw_blocks() } } + mutex_enter(&free_list_mutex); + ib::info() << "withdrawn target: " << UT_LIST_GET_LEN(withdraw) << " blocks"; + mutex_exit(&free_list_mutex); + /* retry is not needed */ ++withdraw_clock_; @@ -2033,6 +2012,7 @@ static void buf_pool_resize_hash() { hash_table_t* new_hash_table; + ut_ad(mutex_own(&buf_pool.zip_hash_mutex)); ut_ad(buf_pool.page_hash_old == NULL); /* recreate page_hash */ @@ -2118,21 +2098,27 @@ inline void buf_pool_t::resize() ulint new_instance_size = srv_buf_pool_size >> srv_page_size_shift; - buf_resize_status("Resizing buffer pool from " ULINTPF " to " + buf_resize_status("Resizing buffer pool to " ULINTPF " (unit=" ULINTPF ").", - srv_buf_pool_old_size, srv_buf_pool_size, + srv_buf_pool_size, srv_buf_pool_chunk_unit); - mutex_enter(&mutex); + // No locking needed to read, same thread updated those ut_ad(curr_size == old_size); ut_ad(n_chunks_new == n_chunks); +#ifdef UNIV_DEBUG + mutex_enter(&free_list_mutex); ut_ad(UT_LIST_GET_LEN(withdraw) == 0); + mutex_exit(&free_list_mutex); + + mutex_enter(&flush_list_mutex); ut_ad(flush_rbt == NULL); + mutex_exit(&flush_list_mutex); +#endif n_chunks_new = (new_instance_size << srv_page_size_shift) / srv_buf_pool_chunk_unit; curr_size = n_chunks_new * chunks->size; - mutex_exit(&mutex); #ifdef BTR_CUR_HASH_ADAPT /* disable AHI if needed */ @@ -2267,8 +2253,18 @@ withdraw_retry: /* Indicate critical path */ resizing.store(true, std::memory_order_relaxed); + /* Acquire all buffer pool mutexes and hash table locks */ + /* TODO: while we certainly lock a lot here, it does not necessarily + buy us enough correctness. Exploits the fact that freed pages must + have no pointers to them from the buffer pool nor from any other thread + except for the freeing one to remove redundant locking. The same applies + to freshly allocated pages before any pointers to them are published.*/ mutex_enter(&mutex); hash_lock_x_all(page_hash); + mutex_enter(&zip_free_mutex); + mutex_enter(&free_list_mutex); + mutex_enter(&zip_hash_mutex); + mutex_enter(&flush_state_mutex); chunk_t::map_reg = UT_NEW_NOKEY(chunk_t::map()); /* add/delete chunks */ @@ -2399,14 +2395,14 @@ calc_buf_pool_size: read_ahead_area = ut_min( BUF_READ_AHEAD_PAGES, ut_2_power_up(curr_size / BUF_READ_AHEAD_PORTION)); + ulint old_pool_size = curr_pool_size; curr_pool_size = n_chunks * srv_buf_pool_chunk_unit; - srv_buf_pool_curr_size = curr_pool_size;/* FIXME: remove*/ old_size = curr_size; - innodb_set_buf_pool_size(buf_pool_size_align(srv_buf_pool_curr_size)); + innodb_set_buf_pool_size(buf_pool_size_align(curr_pool_size)); const bool new_size_too_diff - = srv_buf_pool_base_size > srv_buf_pool_size * 2 - || srv_buf_pool_base_size * 2 < srv_buf_pool_size; + = old_pool_size/2 > curr_pool_size + || old_pool_size < curr_pool_size/2; /* Normalize page_hash and zip_hash, if the new size is too different */ @@ -2416,8 +2412,12 @@ calc_buf_pool_size: ib::info() << "hash tables were resized"; } - hash_unlock_x_all(page_hash); mutex_exit(&mutex); + hash_unlock_x_all(page_hash); + mutex_exit(&zip_free_mutex); + mutex_exit(&free_list_mutex); + mutex_exit(&zip_hash_mutex); + mutex_exit(&flush_state_mutex); if (page_hash_old != NULL) { hash_table_free(page_hash_old); @@ -2430,8 +2430,6 @@ calc_buf_pool_size: /* Normalize other components, if the new size is too different */ if (!warning && new_size_too_diff) { - srv_buf_pool_base_size = srv_buf_pool_size; - buf_resize_status("Resizing also other hash tables."); /* normalize lock_sys */ @@ -2440,8 +2438,7 @@ calc_buf_pool_size: lock_sys.resize(srv_lock_table_size); /* normalize btr_search_sys */ - btr_search_sys_resize( - buf_pool_get_curr_size() / sizeof(void*) / 64); + btr_search_sys_resize(curr_pool_size / sizeof(void*) / 64); dict_sys.resize(); @@ -2455,13 +2452,8 @@ calc_buf_pool_size: /* normalize ibuf.max_size */ ibuf_max_size_update(srv_change_buffer_max_size); - if (srv_buf_pool_old_size != srv_buf_pool_size) { - - ib::info() << "Completed to resize buffer pool from " - << srv_buf_pool_old_size - << " to " << srv_buf_pool_size << "."; - srv_buf_pool_old_size = srv_buf_pool_size; - } + ib::info() << "Completed to resize buffer pool" + " to " << srv_buf_pool_size << "."; #ifdef BTR_CUR_HASH_ADAPT /* enable AHI if needed */ @@ -2494,19 +2486,9 @@ static void buf_resize_callback(void *) { DBUG_ENTER("buf_resize_callback"); ut_a(srv_shutdown_state == SRV_SHUTDOWN_NONE); - mutex_enter(&buf_pool.mutex); - const auto size= srv_buf_pool_size; - const bool work= srv_buf_pool_old_size != size; - mutex_exit(&buf_pool.mutex); - - if (work) - buf_pool.resize(); - else - { - std::ostringstream sout; - sout << "Size did not change: old size = new size = " << size; - buf_resize_status(sout.str().c_str()); - } + ut_a(srv_buf_pool_size_changing); + buf_pool.resize(); + srv_buf_pool_size_changing= false; DBUG_VOID_RETURN; } @@ -2526,18 +2508,17 @@ void buf_resize_shutdown() } -/********************************************************************//** -Relocate a buffer control block. Relocates the block on the LRU list +/** Relocate a buffer control block. Relocates the block on the LRU list and in buf_pool.page_hash. Does not relocate bpage->list. -The caller must take care of relocating bpage->list. */ +The caller must take care of relocating bpage->list. +@param[in,out] bpage control block being relocated, buf_page_get_state() + must be BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE +@param[in,out] dpage destination control block */ static void buf_relocate( -/*=========*/ - buf_page_t* bpage, /*!< in/out: control block being relocated; - buf_page_get_state(bpage) must be - BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */ - buf_page_t* dpage) /*!< in/out: destination control block */ + buf_page_t* bpage, + buf_page_t* dpage) { buf_page_t* b; @@ -2637,8 +2618,9 @@ bool buf_pool_watch_is_sentinel(const buf_page_t* bpage) } /** Add watch for the given page to be read in. Caller must have -appropriate hash_lock for the bpage. This function may release the -hash_lock and reacquire it. +appropriate hash_lock for the bpage and hold the LRU list mutex to avoid a race +condition with buf_LRU_free_page inserting the same page into the page hash. +This function may release the hash_lock and reacquire it. @param[in] page_id page id @param[in,out] hash_lock hash_lock currently latched @return NULL if watch set, block if the page is in the buffer pool */ @@ -2670,9 +2652,7 @@ page_found: } /* From this point this function becomes fairly heavy in terms - of latching. We acquire the buf_pool mutex as well as all the - hash_locks. buf_pool mutex is needed because any changes to - the page_hash must be covered by it and hash_locks are needed + of latching. We acquire all the hash_locks. They are needed because we don't want to read any stale information in buf_pool.watch[]. However, it is not in the critical code path as this function will be called only by the purge thread. */ @@ -2680,20 +2660,16 @@ page_found: /* To obey latching order first release the hash_lock. */ rw_lock_x_unlock(*hash_lock); - mutex_enter(&buf_pool.mutex); hash_lock_x_all(buf_pool.page_hash); /* We have to recheck that the page was not loaded or a watch set by some other purge thread. This is because of the small time window between when we release the - hash_lock to acquire buf_pool.mutex above. */ - + hash_lock to lock all the hash_locks. */ *hash_lock = buf_page_hash_lock_get(page_id); - bpage = buf_page_hash_get_low(page_id); - if (UNIV_LIKELY_NULL(bpage)) { - mutex_exit(&buf_pool.mutex); + if (bpage) { hash_unlock_x_all_but(buf_pool.page_hash, *hash_lock); goto page_found; } @@ -2714,11 +2690,6 @@ page_found: ut_ad(!bpage->in_page_hash); ut_ad(bpage->buf_fix_count == 0); - /* bpage is pointing to buf_pool.watch[], - which is protected by buf_pool.mutex. - Normally, buf_page_t objects are protected by - buf_block_t::mutex or buf_pool.zip_mutex or both. */ - bpage->state = BUF_BLOCK_ZIP_PAGE; bpage->id = page_id; bpage->buf_fix_count = 1; @@ -2727,7 +2698,6 @@ page_found: HASH_INSERT(buf_page_t, hash, buf_pool.page_hash, page_id.fold(), bpage); - mutex_exit(&buf_pool.mutex); /* Once the sentinel is in the page_hash we can safely release all locks except just the relevant hash_lock */ @@ -2755,27 +2725,19 @@ page_found: } /** Remove the sentinel block for the watch before replacing it with a -real block. buf_page_watch_clear() or buf_page_watch_occurred() will notice +real block. buf_pool_watch_unset() or buf_pool_watch_occurred() will notice that the block has been replaced with the real block. @param[in,out] watch sentinel for watch @return reference count, to be added to the replacement block */ -static -void -buf_pool_watch_remove(buf_page_t* watch) +static void buf_pool_watch_remove(buf_page_t *watch) { -#ifdef UNIV_DEBUG - /* We must also own the appropriate hash_bucket mutex. */ - rw_lock_t* hash_lock = buf_page_hash_lock_get(watch->id); - ut_ad(rw_lock_own(hash_lock, RW_LOCK_X)); -#endif /* UNIV_DEBUG */ - - ut_ad(mutex_own(&buf_pool.mutex)); - - HASH_DELETE(buf_page_t, hash, buf_pool.page_hash, watch->id.fold(), - watch); - ut_d(watch->in_page_hash = FALSE); - watch->buf_fix_count = 0; - watch->state = BUF_BLOCK_POOL_WATCH; + ut_ad(rw_lock_own(buf_page_hash_lock_get(watch->id), RW_LOCK_X)); + ut_ad(watch->state == BUF_BLOCK_ZIP_PAGE); + ut_ad(watch->in_page_hash); + HASH_DELETE(buf_page_t, hash, buf_pool.page_hash, watch->id.fold(), watch); + ut_d(watch->in_page_hash= FALSE); + watch->buf_fix_count= 0; + watch->state= BUF_BLOCK_POOL_WATCH; } /** Stop watching if the page has been read in. @@ -2783,27 +2745,17 @@ buf_pool_watch_set(same_page_id) must have returned NULL before. @param[in] page_id page id */ void buf_pool_watch_unset(const page_id_t page_id) { - buf_page_t* bpage; - /* We only need to have buf_pool.mutex in case where we end - up calling buf_pool_watch_remove but to obey latching order - we acquire it here before acquiring hash_lock. This should - not cause too much grief as this function is only ever - called from the purge thread. */ - mutex_enter(&buf_pool.mutex); - - rw_lock_t* hash_lock = buf_page_hash_lock_get(page_id); - rw_lock_x_lock(hash_lock); + rw_lock_t *hash_lock= buf_page_hash_lock_get(page_id); + rw_lock_x_lock(hash_lock); - /* The page must exist because buf_pool_watch_set() - increments buf_fix_count. */ - bpage = buf_page_hash_get_low(page_id); + /* The page must exist because buf_pool_watch_set() + increments buf_fix_count. */ + buf_page_t *bpage= buf_page_hash_get_low(page_id); - if (bpage->unfix() == 0 && buf_pool_watch_is_sentinel(bpage)) { - buf_pool_watch_remove(bpage); - } + if (bpage->unfix() == 0 && buf_pool_watch_is_sentinel(bpage)) + buf_pool_watch_remove(bpage); - mutex_exit(&buf_pool.mutex); - rw_lock_x_unlock(hash_lock); + rw_lock_x_unlock(hash_lock); } /** Check if the page has been read in. @@ -2832,8 +2784,7 @@ bool buf_pool_watch_occurred(const page_id_t page_id) return(ret); } -/********************************************************************//** -Moves a page to the start of the buffer pool LRU list. This high-level +/** Moves a page to the start of the buffer pool LRU list. This high-level function can be used to prevent an important page from slipping out of the buffer pool. @param[in,out] bpage buffer block of a file page */ @@ -2914,18 +2865,27 @@ static void buf_block_try_discard_uncompressed(const page_id_t page_id) { buf_page_t* bpage; - /* Since we need to acquire buf_pool mutex to discard - the uncompressed frame and because page_hash mutex resides - below buf_pool mutex in sync ordering therefore we must - first release the page_hash mutex. This means that the - block in question can move out of page_hash. Therefore - we need to check again if the block is still in page_hash. */ + /* Since we need to acquire buf_pool.mutex to discard + the uncompressed frame and because page_hash mutex resides below + buf_pool.mutex in sync ordering therefore we must first + release the page_hash mutex. This means that the block in question + can move out of page_hash. Therefore we need to check again if the + block is still in page_hash. */ mutex_enter(&buf_pool.mutex); bpage = buf_page_hash_get(page_id); if (bpage) { - buf_LRU_free_page(bpage, false); + + BPageMutex* block_mutex = buf_page_get_mutex(bpage); + + mutex_enter(block_mutex); + + if (buf_LRU_free_page(bpage, false)) { + + return; + } + mutex_exit(block_mutex); } mutex_exit(&buf_pool.mutex); @@ -3212,22 +3172,12 @@ buf_wait_for_read( access the block (and check for IO state) after the block has been added to the page hashtable. */ - if (buf_block_get_io_fix(block) == BUF_IO_READ) { + if (buf_block_get_io_fix_unlocked(block) == BUF_IO_READ) { /* Wait until the read operation completes */ - - BPageMutex* mutex = buf_page_get_mutex(&block->page); - for (;;) { - buf_io_fix io_fix; - - mutex_enter(mutex); - - io_fix = buf_block_get_io_fix(block); - - mutex_exit(mutex); - - if (io_fix == BUF_IO_READ) { + if (buf_block_get_io_fix_unlocked(block) + == BUF_IO_READ) { /* Wait by temporaly s-latch */ rw_lock_s_lock(&block->lock); rw_lock_s_unlock(&block->lock); @@ -3271,6 +3221,7 @@ buf_page_get_gen( unsigned access_time; rw_lock_t* hash_lock; buf_block_t* fix_block; + BPageMutex* fix_mutex = NULL; ulint retries = 0; ut_ad((mtr == NULL) == (mode == BUF_EVICT_IF_IN_POOL)); @@ -3362,8 +3313,7 @@ loop: if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { rw_lock_x_lock(hash_lock); - /* If not own buf_pool_mutex, - page_hash can be changed. */ + /* page_hash can be changed. */ hash_lock = buf_page_hash_lock_x_confirm( hash_lock, page_id); @@ -3385,7 +3335,7 @@ loop: buf_flush_page() for the flush thread counterpart. */ - BPageMutex* fix_mutex + fix_mutex = buf_page_get_mutex( &fix_block->page); mutex_enter(fix_mutex); @@ -3501,7 +3451,7 @@ loop: for synchorization between user thread and flush thread, instead of block->lock. See buf_flush_page() for the flush thread counterpart. */ - BPageMutex* fix_mutex = buf_page_get_mutex( + fix_mutex = buf_page_get_mutex( &fix_block->page); mutex_enter(fix_mutex); fix_block->fix(); @@ -3522,11 +3472,8 @@ got_block: case BUF_PEEK_IF_IN_POOL: case BUF_EVICT_IF_IN_POOL: buf_page_t* fix_page = &fix_block->page; - BPageMutex* fix_mutex = buf_page_get_mutex(fix_page); - mutex_enter(fix_mutex); const bool must_read - = (buf_page_get_io_fix(fix_page) == BUF_IO_READ); - mutex_exit(fix_mutex); + = (buf_page_get_io_fix_unlocked(fix_page) == BUF_IO_READ); if (must_read) { /* The page is being read to buffer pool, @@ -3541,8 +3488,9 @@ got_block: switch (UNIV_EXPECT(buf_block_get_state(fix_block), BUF_BLOCK_FILE_PAGE)) { case BUF_BLOCK_FILE_PAGE: + ut_ad(fix_mutex != &buf_pool.zip_mutex); if (fsp_is_system_temporary(page_id.space()) - && buf_block_get_io_fix(block) != BUF_IO_NONE) { + && buf_block_get_io_fix_unlocked(block) != BUF_IO_NONE) { /* This suggests that the page is being flushed. Avoid returning reference to this page. Instead wait for the flush action to complete. */ @@ -3555,13 +3503,19 @@ got_block: evict_from_pool: ut_ad(!fix_block->page.oldest_modification); mutex_enter(&buf_pool.mutex); + fix_mutex + = buf_page_get_mutex( + &fix_block->page); + mutex_enter(fix_mutex); fix_block->unfix(); if (!buf_LRU_free_page(&fix_block->page, true)) { ut_ad(0); } + // buf_LRU_free_page frees the mutexes we locked. + ut_ad(!mutex_own(fix_mutex)); + ut_ad(!mutex_own(&buf_pool.mutex)); - mutex_exit(&buf_pool.mutex); return(NULL); } break; @@ -3586,10 +3540,13 @@ evict_from_pool: } buf_page_t* bpage = &block->page; + /* MDEV-15053-TODO innodb.table_flags-16k fails on it + ut_ad(fix_mutex == &buf_pool.zip_mutex); */ + ut_ad(fix_mutex == &buf_pool.zip_mutex || !fix_mutex); /* Note: We have already buffer fixed this block. */ if (bpage->buf_fix_count > 1 - || buf_page_get_io_fix(bpage) != BUF_IO_NONE) { + || buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) { /* This condition often occurs when the buffer is not buffer-fixed, but I/O-fixed by @@ -3611,8 +3568,6 @@ evict_from_pool: mutex_enter(&buf_pool.mutex); - hash_lock = buf_page_hash_lock_get(page_id); - rw_lock_x_lock(hash_lock); /* Buffer-fixing prevents the page_hash from changing. */ @@ -3635,10 +3590,10 @@ evict_from_pool: This should be extremely unlikely, for example, if buf_page_get_zip() was invoked. */ - buf_LRU_block_free_non_file_page(block); mutex_exit(&buf_pool.mutex); rw_lock_x_unlock(hash_lock); buf_page_mutex_exit(block); + buf_LRU_block_free_non_file_page(block); /* Try again */ goto loop; @@ -3681,15 +3636,15 @@ evict_from_pool: /* Insert at the front of unzip_LRU list */ buf_unzip_LRU_add_block(block, FALSE); + mutex_exit(&buf_pool.mutex); + buf_block_set_io_fix(block, BUF_IO_READ); rw_lock_x_lock_inline(&block->lock, 0, file, line); UNIV_MEM_INVALID(bpage, sizeof *bpage); rw_lock_x_unlock(hash_lock); - buf_pool.n_pend_unzip++; mutex_exit(&buf_pool.zip_mutex); - mutex_exit(&buf_pool.mutex); access_time = buf_page_is_accessed(&block->page); @@ -3703,16 +3658,14 @@ evict_from_pool: buf_page_free_descriptor(bpage); /* Decompress the page while not holding - buf_pool.mutex or block->mutex. */ + any buf_pool or block->mutex. */ if (!buf_zip_decompress(block, TRUE)) { - mutex_enter(&buf_pool.mutex); buf_page_mutex_enter(fix_block); buf_block_set_io_fix(fix_block, BUF_IO_NONE); buf_page_mutex_exit(fix_block); --buf_pool.n_pend_unzip; - mutex_exit(&buf_pool.mutex); fix_block->unfix(); rw_lock_x_unlock(&fix_block->lock); @@ -3722,17 +3675,13 @@ evict_from_pool: return NULL; } - mutex_enter(&buf_pool.mutex); - buf_page_mutex_enter(fix_block); buf_block_set_io_fix(fix_block, BUF_IO_NONE); buf_page_mutex_exit(fix_block); - --buf_pool.n_pend_unzip; - - mutex_exit(&buf_pool.mutex); + buf_pool.n_pend_unzip++; rw_lock_x_unlock(&block->lock); @@ -3764,16 +3713,20 @@ evict_from_pool: relocated or enter or exit the buf_pool while we are holding the buf_pool.mutex. */ + fix_mutex = buf_page_get_mutex(&fix_block->page); + mutex_enter(fix_mutex); + if (buf_LRU_free_page(&fix_block->page, true)) { - mutex_exit(&buf_pool.mutex); + if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { + /* Hold LRU list mutex, see comment + in buf_pool_watch_set(). */ + mutex_enter(&buf_pool.mutex); + } /* page_hash can be changed. */ hash_lock = buf_page_hash_lock_get(page_id); rw_lock_x_lock(hash_lock); - - /* If not own buf_pool_mutex, - page_hash can be changed. */ hash_lock = buf_page_hash_lock_x_confirm( hash_lock, page_id); @@ -3783,6 +3736,7 @@ evict_from_pool: buffer pool in the first place. */ block = (buf_block_t*) buf_pool_watch_set( page_id, &hash_lock); + mutex_exit(&buf_pool.mutex); } else { block = (buf_block_t*) buf_page_hash_get_low( page_id); @@ -3793,7 +3747,7 @@ evict_from_pool: if (block != NULL) { /* Either the page has been read in or a watch was set on that in the window - where we released the buf_pool::mutex + where we released the buf_pool.mutex and before we acquire the hash_lock above. Try again. */ guess = block; @@ -3804,21 +3758,19 @@ evict_from_pool: return(NULL); } - buf_page_mutex_enter(fix_block); - if (buf_flush_page_try(fix_block)) { guess = fix_block; goto loop; } + mutex_exit(&buf_pool.mutex); + buf_page_mutex_exit(fix_block); fix_block->fix(); /* Failed to evict the page; change it directly */ - - mutex_exit(&buf_pool.mutex); } #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ @@ -4081,16 +4033,16 @@ buf_page_try_get_func( ut_ad(!buf_pool_watch_is_sentinel(&block->page)); - buf_page_mutex_enter(block); + buf_block_buf_fix_inc(block, file, line); + rw_lock_s_unlock(hash_lock); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG + buf_page_mutex_enter(block); ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_a(page_id == block->page.id); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - - buf_block_buf_fix_inc(block, file, line); buf_page_mutex_exit(block); +#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ mtr_memo_type_t fix_type = MTR_MEMO_PAGE_S_FIX; success = rw_lock_s_lock_nowait(&block->lock, file, line); @@ -4148,7 +4100,8 @@ buf_page_init_low( HASH_INVALIDATE(bpage, hash); } -/** Inits a page to the buffer buf_pool. +/** Inits a page to the buffer buf_pool. The block pointer must be private to +the calling thread at the start of this function. @param[in] page_id page id @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in,out] block block to init */ @@ -4157,8 +4110,7 @@ static void buf_page_init(const page_id_t page_id, ulint zip_size, { buf_page_t* hash_page; - ut_ad(mutex_own(&buf_pool.mutex)); - ut_ad(buf_page_mutex_own(block)); + ut_ad(!mutex_own(buf_page_get_mutex(&block->page))); ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE); ut_ad(rw_lock_own(buf_page_hash_lock_get(page_id), RW_LOCK_X)); @@ -4202,8 +4154,6 @@ static void buf_page_init(const page_id_t page_id, ulint zip_size, << " already found in the hash table: " << hash_page << ", " << block; - ut_d(buf_page_mutex_exit(block)); - ut_d(mutex_exit(&buf_pool.mutex)); ut_d(buf_pool.print()); ut_d(buf_LRU_print()); ut_d(buf_LRU_validate()); @@ -4247,12 +4197,9 @@ buf_page_init_for_read( bool unzip) { buf_block_t* block; - buf_page_t* bpage = NULL; - buf_page_t* watch_page; rw_lock_t* hash_lock; mtr_t mtr; - bool lru = false; - void* data; + void* data = NULL; *err = DB_SUCCESS; @@ -4281,20 +4228,41 @@ buf_page_init_for_read( ut_ad(block); } + buf_page_t* bpage = NULL; + if (block == NULL) { + bpage = buf_page_alloc_descriptor(); + } + + if (!block || zip_size) { + data = buf_buddy_alloc(zip_size); + } + mutex_enter(&buf_pool.mutex); hash_lock = buf_page_hash_lock_get(page_id); rw_lock_x_lock(hash_lock); + buf_page_t* watch_page; + watch_page = buf_page_hash_get_low(page_id); if (watch_page && !buf_pool_watch_is_sentinel(watch_page)) { /* The page is already in the buffer pool. */ watch_page = NULL; + + mutex_exit(&buf_pool.mutex); + rw_lock_x_unlock(hash_lock); - if (block) { - buf_page_mutex_enter(block); + + if (bpage != NULL) { + buf_page_free_descriptor(bpage); + } + + if (data != NULL) { + buf_buddy_free(data, zip_size); + } + + if (block != NULL) { buf_LRU_block_free_non_file_page(block); - buf_page_mutex_exit(block); } bpage = NULL; @@ -4302,46 +4270,23 @@ buf_page_init_for_read( } if (block) { + ut_ad(!bpage); bpage = &block->page; - buf_page_mutex_enter(block); - buf_page_init(page_id, zip_size, block); + buf_page_mutex_enter(block); + /* Note: We are using the hash_lock for protection. This is safe because no other thread can lookup the block from the page hashtable yet. */ buf_page_set_io_fix(bpage, BUF_IO_READ); - rw_lock_x_unlock(hash_lock); - /* The block must be put to the LRU list, to the old blocks */ buf_LRU_add_block(bpage, TRUE/* to old blocks */); - /* We set a pass-type x-lock on the frame because then - the same thread which called for the read operation - (and is running now at this point of code) can wait - for the read to complete by waiting for the x-lock on - the frame; if the x-lock were recursive, the same - thread would illegally get the x-lock before the page - read is completed. The x-lock is cleared by the - io-handler thread. */ - - rw_lock_x_lock_gen(&block->lock, BUF_IO_READ); - if (zip_size) { - /* buf_pool.mutex may be released and - reacquired by buf_buddy_alloc(). Thus, we - must release block->mutex in order not to - break the latching order in the reacquisition - of buf_pool.mutex. We also must defer this - operation until after the block descriptor has - been added to buf_pool.LRU and - buf_pool.page_hash. */ - buf_page_mutex_exit(block); - data = buf_buddy_alloc(zip_size, &lru); - buf_page_mutex_enter(block); block->page.zip.data = (page_zip_t*) data; /* To maintain the invariant @@ -4353,41 +4298,27 @@ buf_page_init_for_read( buf_unzip_LRU_add_block(block, TRUE); } - buf_page_mutex_exit(block); - } else { - rw_lock_x_unlock(hash_lock); - - /* The compressed page must be allocated before the - control block (bpage), in order to avoid the - invocation of buf_buddy_relocate_block() on - uninitialized data. */ - data = buf_buddy_alloc(zip_size, &lru); - - rw_lock_x_lock(hash_lock); - - /* If buf_buddy_alloc() allocated storage from the LRU list, - it released and reacquired buf_pool.mutex. Thus, we must - check the page_hash again, as it may have been modified. */ - if (UNIV_UNLIKELY(lru)) { + mutex_exit(&buf_pool.mutex); watch_page = buf_page_hash_get_low(page_id); + /* We set a pass-type x-lock on the frame because then + the same thread which called for the read operation + (and is running now at this point of code) can wait + for the read to complete by waiting for the x-lock on + the frame; if the x-lock were recursive, the same + thread would illegally get the x-lock before the page + read is completed. The x-lock is cleared by the + io-handler thread. */ - if (UNIV_UNLIKELY(watch_page - && !buf_pool_watch_is_sentinel(watch_page))) { - - /* The block was added by some other thread. */ - rw_lock_x_unlock(hash_lock); - watch_page = NULL; - buf_buddy_free(data, zip_size); + rw_lock_x_lock_gen(&block->lock, BUF_IO_READ); - bpage = NULL; - goto func_exit; - } - } + rw_lock_x_unlock(hash_lock); - bpage = buf_page_alloc_descriptor(); + buf_page_mutex_exit(block); + } else { page_zip_des_init(&bpage->zip); page_zip_set_size(&bpage->zip, zip_size); + ut_ad(data); bpage->zip.data = (page_zip_t*) data; mutex_enter(&buf_pool.zip_mutex); @@ -4441,7 +4372,6 @@ buf_page_init_for_read( buf_pool.n_pend_reads++; func_exit: - mutex_exit(&buf_pool.mutex); if (mode == BUF_READ_IBUF_PAGES_ONLY) { @@ -4518,10 +4448,10 @@ buf_page_create( block = free_block; - buf_page_mutex_enter(block); - buf_page_init(page_id, zip_size, block); + buf_page_mutex_enter(block); + rw_lock_x_unlock(hash_lock); /* The block must be put to the LRU list */ @@ -4538,14 +4468,10 @@ buf_page_create( buf_page_set_io_fix(&block->page, BUF_IO_READ); rw_lock_x_lock(&block->lock); + mutex_exit(&buf_pool.mutex); buf_page_mutex_exit(block); - /* buf_pool.mutex may be released and reacquired by - buf_buddy_alloc(). Thus, we must release block->mutex - in order not to break the latching order in - the reacquisition of buf_pool.mutex. We also must - defer this operation until after the block descriptor - has been added to buf_pool.LRU and buf_pool.page_hash. */ block->page.zip.data = buf_buddy_alloc(zip_size); + mutex_enter(&buf_pool.mutex); buf_page_mutex_enter(block); /* To maintain the invariant @@ -4733,9 +4659,13 @@ buf_corrupt_page_release(buf_page_t* bpage, const fil_space_t* space) const ibool uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); page_id_t old_page_id = bpage->id; + rw_lock_t* hash_lock = buf_page_hash_lock_get(bpage->id); /* First unfix and release lock on the bpage */ mutex_enter(&buf_pool.mutex); + + rw_lock_x_lock(hash_lock); + mutex_enter(buf_page_get_mutex(bpage)); ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ); ut_ad(bpage->id.space() == space->id); @@ -4753,19 +4683,20 @@ buf_corrupt_page_release(buf_page_t* bpage, const fil_space_t* space) BUF_IO_READ); } - mutex_exit(buf_page_get_mutex(bpage)); - if (!srv_force_recovery) { buf_mark_space_corrupt(bpage, *space); } - /* After this point bpage can't be referenced. */ + /* The hash lock and block mutex will be released during the "free" */ buf_LRU_free_one_page(bpage, old_page_id); - ut_ad(buf_pool.n_pend_reads > 0); - buf_pool.n_pend_reads--; + ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X) + && !rw_lock_own(hash_lock, RW_LOCK_S)); mutex_exit(&buf_pool.mutex); + + ut_ad(buf_pool.n_pend_reads > 0); + buf_pool.n_pend_reads--; } /** Check if the encrypted page is corrupted for the full crc32 format. @@ -4877,6 +4808,8 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict) enum buf_io_fix io_type; const bool uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); + bool have_LRU_mutex = false; + ut_a(buf_page_in_file(bpage)); /* We do not need protect io_fix here by mutex to read @@ -4885,7 +4818,7 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict) ensures that this is the only thread that handles the i/o for this block. */ - io_type = buf_page_get_io_fix(bpage); + io_type = buf_page_get_io_fix_unlocked(bpage); ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE); ut_ad(!!bpage->zip.ssize == (bpage->zip.data != NULL)); ut_ad(uncompressed || bpage->zip.data); @@ -5071,19 +5004,40 @@ release_page: } } - BPageMutex* block_mutex = buf_page_get_mutex(bpage); + mutex_enter(&buf_pool.mutex); - mutex_enter(block_mutex); + + BPageMutex* page_mutex = buf_page_get_mutex(bpage); + mutex_enter(page_mutex); + + if (io_type == BUF_IO_WRITE + && ( +#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG + /* to keep consistency at buf_LRU_insert_zip_clean() */ + buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY || +#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ + buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU || + buf_page_get_flush_type(bpage) == BUF_FLUSH_SINGLE_PAGE)) { + + have_LRU_mutex = true; /* optimistic */ + } else { + mutex_exit(&buf_pool.mutex); + } + /* Because this thread which does the unlocking is not the same that did the locking, we use a pass value != 0 in unlock, which simply removes the newest lock debug record, without checking the thread id. */ - buf_page_set_io_fix(bpage, BUF_IO_NONE); buf_page_monitor(bpage, io_type); if (io_type == BUF_IO_READ) { + + ut_ad(!have_LRU_mutex); + + buf_page_set_io_fix(bpage, BUF_IO_NONE); + /* NOTE that the call to ibuf may have moved the ownership of the x-latch to this OS thread: do not let this confuse you in debugging! */ @@ -5097,7 +5051,7 @@ release_page: BUF_IO_READ); } - mutex_exit(block_mutex); + mutex_exit(page_mutex); } else { /* Write means a flush operation: call the completion routine in the flush system */ @@ -5119,19 +5073,22 @@ release_page: by the caller explicitly. */ if (buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) { evict = true; + ut_ad(have_LRU_mutex); } - mutex_exit(block_mutex); - - if (evict) { - buf_LRU_free_page(bpage, true); + if (evict && buf_LRU_free_page(bpage, true)) { + have_LRU_mutex = false; + } else { + mutex_exit(buf_page_get_mutex(bpage)); + } + if (have_LRU_mutex) { + mutex_exit(&buf_pool.mutex); } } DBUG_PRINT("ib_buf", ("%s page %u:%u", io_type == BUF_IO_READ ? "read" : "wrote", bpage->id.space(), bpage->id.page_no())); - mutex_exit(&buf_pool.mutex); return DB_SUCCESS; } @@ -5161,7 +5118,9 @@ void buf_refresh_io_stats() All pages must be in a replaceable state (not modified or latched). */ void buf_pool_invalidate() { - mutex_enter(&buf_pool.mutex); + ut_ad(!mutex_own(&buf_pool.mutex)); + + mutex_enter(&buf_pool.flush_state_mutex); for (unsigned i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) { @@ -5179,18 +5138,19 @@ void buf_pool_invalidate() if (buf_pool.n_flush[i] > 0) { buf_flush_t type = buf_flush_t(i); - mutex_exit(&buf_pool.mutex); + mutex_exit(&buf_pool.flush_state_mutex); buf_flush_wait_batch_end(type); - mutex_enter(&buf_pool.mutex); + mutex_enter(&buf_pool.flush_state_mutex); } } - ut_d(mutex_exit(&buf_pool.mutex)); + mutex_exit(&buf_pool.flush_state_mutex); ut_d(buf_pool.assert_all_freed()); - ut_d(mutex_enter(&buf_pool.mutex)); while (buf_LRU_scan_and_free_block(true)); + mutex_enter(&buf_pool.mutex); + ut_ad(UT_LIST_GET_LEN(buf_pool.LRU) == 0); ut_ad(UT_LIST_GET_LEN(buf_pool.unzip_LRU) == 0); @@ -5198,9 +5158,10 @@ void buf_pool_invalidate() buf_pool.LRU_old = NULL; buf_pool.LRU_old_len = 0; + mutex_exit(&buf_pool.mutex); + memset(&buf_pool.stat, 0x00, sizeof(buf_pool.stat)); buf_refresh_io_stats(); - mutex_exit(&buf_pool.mutex); } #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG @@ -5208,8 +5169,6 @@ void buf_pool_invalidate() void buf_pool_t::validate() { buf_page_t* b; - buf_pool_t::chunk_t* chunk; - ulint i; ulint n_lru_flush = 0; ulint n_page_flush = 0; ulint n_list_flush = 0; @@ -5218,22 +5177,23 @@ void buf_pool_t::validate() ulint n_free = 0; ulint n_zip = 0; - mutex_enter(&buf_pool.mutex); - hash_lock_x_all(buf_pool.page_hash); + mutex_enter(&mutex); + hash_lock_x_all(page_hash); + mutex_enter(&zip_mutex); + mutex_enter(&free_list_mutex); + mutex_enter(&flush_state_mutex); - chunk = buf_pool.chunks; + chunk_t* chunk = chunks; /* Check the uncompressed blocks. */ - for (i = buf_pool.n_chunks; i--; chunk++) { + for (ulint i = n_chunks; i--; chunk++) { ulint j; buf_block_t* block = chunk->blocks; for (j = chunk->size; j--; block++) { - buf_page_mutex_enter(block); - switch (buf_block_get_state(block)) { case BUF_BLOCK_POOL_WATCH: case BUF_BLOCK_ZIP_PAGE: @@ -5247,7 +5207,7 @@ void buf_pool_t::validate() ut_ad(buf_page_hash_get_low(block->page.id) == &block->page); - switch (buf_page_get_io_fix(&block->page)) { + switch (buf_page_get_io_fix_unlocked(&block->page)) { case BUF_IO_NONE: break; @@ -5255,20 +5215,8 @@ void buf_pool_t::validate() switch (buf_page_get_flush_type( &block->page)) { case BUF_FLUSH_LRU: - n_lru_flush++; - goto assert_s_latched; case BUF_FLUSH_SINGLE_PAGE: - n_page_flush++; -assert_s_latched: - ut_a(rw_lock_is_locked( - &block->lock, - RW_LOCK_S) - || rw_lock_is_locked( - &block->lock, - RW_LOCK_SX)); - break; case BUF_FLUSH_LIST: - n_list_flush++; break; default: ut_error; @@ -5295,16 +5243,12 @@ assert_s_latched: /* do nothing */ break; } - - buf_page_mutex_exit(block); } } - mutex_enter(&buf_pool.zip_mutex); - /* Check clean compressed-only blocks. */ - for (b = UT_LIST_GET_FIRST(buf_pool.zip_clean); b; + for (b = UT_LIST_GET_FIRST(zip_clean); b; b = UT_LIST_GET_NEXT(list, b)) { ut_ad(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE); switch (buf_page_get_io_fix(b)) { @@ -5324,7 +5268,7 @@ assert_s_latched: } /* It is OK to read oldest_modification here because - we have acquired buf_pool.zip_mutex above which acts + we have acquired zip_mutex above which acts as the 'block->mutex' for these bpages. */ ut_ad(!b->oldest_modification); ut_ad(buf_page_hash_get_low(b->id) == b); @@ -5334,8 +5278,8 @@ assert_s_latched: /* Check dirty blocks. */ - mutex_enter(&buf_pool.flush_list_mutex); - for (b = UT_LIST_GET_FIRST(buf_pool.flush_list); b; + mutex_enter(&flush_list_mutex); + for (b = UT_LIST_GET_FIRST(flush_list); b; b = UT_LIST_GET_NEXT(list, b)) { ut_ad(b->in_flush_list); ut_ad(b->oldest_modification); @@ -5345,7 +5289,9 @@ assert_s_latched: case BUF_BLOCK_ZIP_DIRTY: n_lru++; n_zip++; - switch (buf_page_get_io_fix(b)) { + /* fall through */ + case BUF_BLOCK_FILE_PAGE: + switch (buf_page_get_io_fix_unlocked(b)) { case BUF_IO_NONE: case BUF_IO_READ: case BUF_IO_PIN: @@ -5367,51 +5313,50 @@ assert_s_latched: break; } break; - case BUF_BLOCK_FILE_PAGE: - /* uncompressed page */ + case BUF_BLOCK_REMOVE_HASH: + /* We do not hold buf_pool.mutex here. */ break; case BUF_BLOCK_POOL_WATCH: case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_NOT_USED: case BUF_BLOCK_READY_FOR_USE: case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: ut_error; break; } ut_ad(buf_page_hash_get_low(b->id) == b); } - ut_ad(UT_LIST_GET_LEN(buf_pool.flush_list) == n_flush); - - hash_unlock_x_all(buf_pool.page_hash); - mutex_exit(&buf_pool.flush_list_mutex); + ut_ad(UT_LIST_GET_LEN(flush_list) == n_flush); - mutex_exit(&buf_pool.zip_mutex); + hash_unlock_x_all(page_hash); + mutex_exit(&flush_list_mutex); + mutex_exit(&zip_mutex); - if (buf_pool.curr_size == buf_pool.old_size - && n_lru + n_free > buf_pool.curr_size + n_zip) { + if (curr_size == old_size + && n_lru + n_free > curr_size + n_zip) { ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free - << ", pool " << buf_pool.curr_size + << ", pool " << curr_size << " zip " << n_zip << ". Aborting..."; } - ut_ad(UT_LIST_GET_LEN(buf_pool.LRU) == n_lru); + ut_ad(UT_LIST_GET_LEN(LRU) == n_lru); - if (buf_pool.curr_size == buf_pool.old_size - && UT_LIST_GET_LEN(buf_pool.free) != n_free) { + mutex_exit(&mutex); + + if (curr_size == old_size + && UT_LIST_GET_LEN(free) > n_free) { ib::fatal() << "Free list len " - << UT_LIST_GET_LEN(buf_pool.free) + << UT_LIST_GET_LEN(free) << ", free blocks " << n_free << ". Aborting..."; } - ut_ad(buf_pool.n_flush[BUF_FLUSH_LIST] == n_list_flush); - ut_ad(buf_pool.n_flush[BUF_FLUSH_LRU] == n_lru_flush); - ut_ad(buf_pool.n_flush[BUF_FLUSH_SINGLE_PAGE] == n_page_flush); + mutex_exit(&free_list_mutex); - mutex_exit(&buf_pool.mutex); + ut_ad(this->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_page_flush); + mutex_exit(&flush_state_mutex); ut_d(buf_LRU_validate()); ut_d(buf_flush_validate()); @@ -5429,7 +5374,7 @@ void buf_pool_t::print() ulint j; index_id_t id; ulint n_found; - buf_pool_t::chunk_t* chunk; + chunk_t* chunk; dict_index_t* index; size = curr_size; @@ -5549,18 +5494,15 @@ ulint buf_get_latched_pages_number() continue; } - buf_page_mutex_enter(block); - if (block->page.buf_fix_count != 0 - || buf_page_get_io_fix(&block->page) + || buf_page_get_io_fix_unlocked(&block->page) != BUF_IO_NONE) { fixed_pages_number++; } - - buf_page_mutex_exit(block); } } + mutex_exit(&buf_pool.mutex); mutex_enter(&buf_pool.zip_mutex); /* Traverse the lists of clean and dirty compressed-only blocks. */ @@ -5604,7 +5546,6 @@ ulint buf_get_latched_pages_number() mutex_exit(&buf_pool.flush_list_mutex); mutex_exit(&buf_pool.zip_mutex); - mutex_exit(&buf_pool.mutex); return(fixed_pages_number); } @@ -5618,6 +5559,8 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info) double time_elapsed; mutex_enter(&buf_pool.mutex); + mutex_enter(&buf_pool.free_list_mutex); + mutex_enter(&buf_pool.flush_state_mutex); mutex_enter(&buf_pool.flush_list_mutex); pool_info->pool_size = buf_pool.curr_size; @@ -5647,6 +5590,9 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info) + buf_pool.init_flush[BUF_FLUSH_SINGLE_PAGE]); mutex_exit(&buf_pool.flush_list_mutex); + mutex_exit(&buf_pool.flush_state_mutex); + mutex_exit(&buf_pool.free_list_mutex); + mutex_exit(&buf_pool.mutex); current_time = time(NULL); time_elapsed = 0.001 + difftime(current_time, @@ -5737,7 +5683,6 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info) pool_info->unzip_cur = buf_LRU_stat_cur.unzip; buf_refresh_io_stats(); - mutex_exit(&buf_pool.mutex); } /*********************************************************************//** @@ -5872,12 +5817,12 @@ ulint buf_pool_check_no_pending_io() { /* FIXME: use atomics, no mutex */ ulint pending_io = buf_pool.n_pend_reads; - mutex_enter(&buf_pool.mutex); + mutex_enter(&buf_pool.flush_state_mutex); pending_io += + buf_pool.n_flush[BUF_FLUSH_LRU] + buf_pool.n_flush[BUF_FLUSH_SINGLE_PAGE] + buf_pool.n_flush[BUF_FLUSH_LIST]; - mutex_exit(&buf_pool.mutex); + mutex_exit(&buf_pool.flush_state_mutex); return(pending_io); } @@ -5915,5 +5860,9 @@ buf_page_get_trim_length( ulint write_length) { return bpage->physical_size() - write_length; + ut_ad(mutex_own(&buf_pool.mutex)); + ut_ad(mutex_own(&buf_pool.free_list_mutex)); + ut_ad(mutex_own(&buf_pool.flush_state_mutex)); + ut_ad(mutex_own(&buf_pool.flush_list_mutex)); } #endif /* !UNIV_INNOCHECKSUM */ |