diff options
Diffstat (limited to 'storage/innobase/include')
-rw-r--r-- | storage/innobase/include/buf0buf.h | 193 | ||||
-rw-r--r-- | storage/innobase/include/buf0lru.h | 2 | ||||
-rw-r--r-- | storage/innobase/include/buf0types.h | 33 | ||||
-rw-r--r-- | storage/innobase/include/hash0hash.h | 4 | ||||
-rw-r--r-- | storage/innobase/include/rw_lock.h | 106 | ||||
-rw-r--r-- | storage/innobase/include/srv0srv.h | 2 | ||||
-rw-r--r-- | storage/innobase/include/sync0rw.ic | 18 | ||||
-rw-r--r-- | storage/innobase/include/sync0sync.h | 1 |
8 files changed, 262 insertions, 97 deletions
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 96c96113e85..4e0b25c52dd 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -70,9 +70,6 @@ struct fil_addr_t; #define BUF_EVICT_IF_IN_POOL 20 /*!< evict a clean block if found */ /* @} */ -#define MAX_PAGE_HASH_LOCKS 1024 /*!< The maximum number of - page_hash locks */ - # ifdef UNIV_DEBUG extern my_bool buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing buffer pool is not allowed. */ @@ -1605,47 +1602,9 @@ public: } /** Get the page_hash latch for a page */ - rw_lock_t *hash_lock_get(const page_id_t id) const - { - return hash_lock_get_low(id.fold()); - } - /** Get a page_hash latch. */ - rw_lock_t *hash_lock_get_low(ulint fold) const - { - return page_hash_latches + - ut_2pow_remainder(page_hash.calc_hash(fold), - ulint{srv_n_page_hash_locks}); - } -private: - /** Get a page_hash latch. */ - rw_lock_t *hash_lock_get_low(ulint fold, ulint n_cells) const - { - return page_hash_latches + - ut_2pow_remainder(ut_hash_ulint(fold, n_cells), - ulint{srv_n_page_hash_locks}); - } -public: - - /** Acquire a page_hash bucket latch, tolerating concurrent resize() - @tparam exclusive whether the latch is to be acquired exclusively - @param fold hash bucket key */ - template<bool exclusive> rw_lock_t *page_hash_lock(ulint fold) + page_hash_latch *hash_lock_get(const page_id_t id) const { - for (;;) - { - auto n_cells= page_hash.n_cells; - rw_lock_t *latch= hash_lock_get_low(fold, n_cells); - if (exclusive) - rw_lock_x_lock(latch); - else - rw_lock_s_lock(latch); - if (UNIV_LIKELY(n_cells == page_hash.n_cells)) - return latch; - if (exclusive) - rw_lock_x_unlock(latch); - else - rw_lock_s_unlock(latch); - } + return page_hash.lock_get(id.fold()); } /** Look up a block descriptor. @@ -1656,9 +1615,7 @@ public: buf_page_t *page_hash_get_low(const page_id_t id, const ulint fold) { ut_ad(id.fold() == fold); - ut_ad(mutex_own(&mutex) || - rw_lock_own_flagged(hash_lock_get_low(fold), - RW_LOCK_FLAG_X | RW_LOCK_FLAG_S)); + ut_ad(mutex_own(&mutex) || page_hash.lock_get(fold)->is_locked()); buf_page_t *bpage; /* Look for the page in the hash table */ HASH_SEARCH(hash, &page_hash, fold, buf_page_t*, bpage, @@ -1676,17 +1633,14 @@ private: @retval nullptr if no block was found; !lock || !*lock will also hold */ template<bool exclusive,bool watch> buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold, - rw_lock_t **hash_lock) + page_hash_latch **hash_lock) { ut_ad(hash_lock || !exclusive); - rw_lock_t *latch= page_hash_lock<exclusive>(fold); + page_hash_latch *latch= page_hash.lock<exclusive>(fold); buf_page_t *bpage= page_hash_get_low(page_id, fold); if (!bpage || watch_is_sentinel(*bpage)) { - if (exclusive) - rw_lock_x_unlock(latch); - else - rw_lock_s_unlock(latch); + latch->release<exclusive>(); if (hash_lock) *hash_lock= nullptr; return watch ? bpage : nullptr; @@ -1697,10 +1651,8 @@ private: if (hash_lock) *hash_lock= latch; /* to be released by the caller */ - else if (exclusive) - rw_lock_x_unlock(latch); else - rw_lock_s_unlock(latch); + latch->release<exclusive>(); return bpage; } public: @@ -1713,7 +1665,7 @@ public: @retval nullptr if no block was found; !lock || !*lock will also hold */ template<bool exclusive> buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold, - rw_lock_t **hash_lock) + page_hash_latch **hash_lock) { return page_hash_get_locked<exclusive,false>(page_id, fold, hash_lock); } /** @return whether the buffer pool contains a page @@ -1730,9 +1682,7 @@ public: @return whether bpage a sentinel for a buffer pool watch */ bool watch_is_sentinel(const buf_page_t &bpage) { - ut_ad(mutex_own(&mutex) || - rw_lock_own_flagged(hash_lock_get(bpage.id()), - RW_LOCK_FLAG_X | RW_LOCK_FLAG_S)); + ut_ad(mutex_own(&mutex) || hash_lock_get(bpage.id())->is_locked()); ut_ad(bpage.in_file()); if (&bpage < &watch[0] || &bpage >= &watch[UT_ARR_SIZE(watch)]) @@ -1754,11 +1704,11 @@ public: bool watch_occurred(const page_id_t id) { const ulint fold= id.fold(); - rw_lock_t *hash_lock= page_hash_lock<false>(fold); + page_hash_latch *hash_lock= page_hash.lock<false>(fold); /* The page must exist because watch_set() increments buf_fix_count. */ buf_page_t *bpage= page_hash_get_low(id, fold); const bool is_sentinel= watch_is_sentinel(*bpage); - rw_lock_s_unlock(hash_lock); + hash_lock->read_unlock(); return !is_sentinel; } @@ -1769,7 +1719,8 @@ public: @param hash_lock exclusively held page_hash latch @return a buffer pool block corresponding to id @retval nullptr if the block was not present, and a watch was installed */ - inline buf_page_t *watch_set(const page_id_t id, rw_lock_t **hash_lock); + inline buf_page_t *watch_set(const page_id_t id, + page_hash_latch **hash_lock); /** Stop watching whether a page has been read in. watch_set(id) must have returned nullptr before. @@ -1777,7 +1728,7 @@ public: void watch_unset(const page_id_t id) { const ulint fold= id.fold(); - rw_lock_t *hash_lock= page_hash_lock<true>(fold); + page_hash_latch *hash_lock= page_hash.lock<true>(fold); /* The page must exist because watch_set() increments buf_fix_count. */ buf_page_t *watch= page_hash_get_low(id, fold); if (watch->unfix() == 0 && watch_is_sentinel(*watch)) @@ -1786,7 +1737,7 @@ public: ut_ad(watch->in_page_hash); ut_d(watch->in_page_hash= false); HASH_DELETE(buf_page_t, hash, &page_hash, fold, watch); - rw_lock_x_unlock(hash_lock); + hash_lock->write_unlock(); // Now that the watch is detached from page_hash, release it to watch[]. mutex_enter(&mutex); /* It is possible that watch_remove() already removed the watch. */ @@ -1799,7 +1750,7 @@ public: mutex_exit(&mutex); } else - rw_lock_x_unlock(hash_lock); + hash_lock->write_unlock(); } /** Remove the sentinel block for the watch before replacing it with a @@ -1872,11 +1823,92 @@ public: /** read-ahead request size in pages */ Atomic_counter<uint32_t> read_ahead_area; + /** Hash table with singly-linked overflow lists. @see hash_table_t */ + struct page_hash_table + { + /** Number of array[] elements per page_hash_latch. + Must be one less than a power of 2. */ + static constexpr size_t ELEMENTS_PER_LATCH= 1023; + + /** number of payload elements in array[] */ + Atomic_relaxed<ulint> n_cells; + /** the hash array, with pad(n_cells) elements */ + hash_cell_t *array; + + /** Create the hash table. + @param n the lower bound of n_cells */ + void create(ulint n); + + /** Free the hash table. */ + void free() { ut_free(array); array= nullptr; } + + /** @return the index of an array element */ + ulint calc_hash(ulint fold) const { return calc_hash(fold, n_cells); } + /** @return raw array index converted to padded index */ + static ulint pad(ulint h) { return 1 + (h / ELEMENTS_PER_LATCH) + h; } + private: + /** @return the hash value before any ELEMENTS_PER_LATCH padding */ + static ulint hash(ulint fold, ulint n) { return ut_hash_ulint(fold, n); } + + /** @return the index of an array element */ + static ulint calc_hash(ulint fold, ulint n_cells) + { + return pad(hash(fold, n_cells)); + } + /** Get a page_hash latch. */ + page_hash_latch *lock_get(ulint fold, ulint n) const + { + static_assert(!((ELEMENTS_PER_LATCH + 1) & ELEMENTS_PER_LATCH), + "must be one less than a power of 2"); + return reinterpret_cast<page_hash_latch*> + (&array[calc_hash(fold, n) & ~ELEMENTS_PER_LATCH]); + } + public: + /** Get a page_hash latch. */ + page_hash_latch *lock_get(ulint fold) const + { return lock_get(fold, n_cells); } + + /** Acquire an array latch, tolerating concurrent buf_pool_t::resize() + @tparam exclusive whether the latch is to be acquired exclusively + @param fold hash bucket key */ + template<bool exclusive> page_hash_latch *lock(ulint fold) + { + for (;;) + { + auto n= n_cells; + page_hash_latch *latch= lock_get(fold, n); + latch->acquire<exclusive>(); + /* Our latch prevents n_cells from changing. */ + if (UNIV_LIKELY(n == n_cells)) + return latch; + /* Retry, because buf_pool_t::resize_hash() affected us. */ + latch->release<exclusive>(); + } + } + + /** Exclusively aqcuire all latches */ + inline void write_lock_all(); + + /** Release all latches */ + inline void write_unlock_all(); + }; + +private: + /** Former page_hash that has been deleted during resize(); + singly-linked list via freed_page_hash->array[1] */ + page_hash_table *freed_page_hash; + + /** Lock all page_hash, also freed_page_hash. */ + inline void write_lock_all_page_hash(); + /** Release all page_hash, also freed_page_hash. */ + inline void write_unlock_all_page_hash(); + /** Resize page_hash and zip_hash. */ + inline void resize_hash(); + +public: /** Hash table of file pages (buf_page_t::in_file() holds), - indexed by page_id_t. Protected by both mutex and page_hash_latches[]. */ - hash_table_t page_hash; - /** Latches protecting page_hash */ - mutable rw_lock_t page_hash_latches[MAX_PAGE_HASH_LOCKS]; + indexed by page_id_t. Protected by both mutex and page_hash.lock_get(). */ + page_hash_table page_hash; /** map of block->frame to buf_block_t blocks that belong to buf_buddy_alloc(); protected by buf_pool.mutex */ @@ -2103,6 +2135,19 @@ private: /** The InnoDB buffer pool */ extern buf_pool_t buf_pool; +inline void page_hash_latch::read_lock() +{ + ut_ad(!mutex_own(&buf_pool.mutex)); + if (!read_trylock()) + read_lock_wait(); +} + +inline void page_hash_latch::write_lock() +{ + if (!write_trylock()) + write_lock_wait(); +} + inline void buf_page_t::add_buf_fix_count(uint32_t count) { ut_ad(mutex_own(&buf_pool.mutex)); @@ -2129,15 +2174,15 @@ inline void buf_page_t::set_state(buf_page_state state) if (!in_file()) break; /* fall through */ case BUF_BLOCK_FILE_PAGE: - ut_ad(rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X)); + ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked()); break; case BUF_BLOCK_NOT_USED: if (!in_file()) break; /* fall through */ case BUF_BLOCK_ZIP_PAGE: - ut_ad((this >= &buf_pool.watch[0] && - this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]) || - rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X)); + ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked() || + (this >= &buf_pool.watch[0] && + this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)])); break; } #endif @@ -2159,7 +2204,7 @@ inline void buf_page_t::set_corrupt_id() break; case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_FILE_PAGE: - ut_ad(rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X)); + ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked()); break; case BUF_BLOCK_NOT_USED: case BUF_BLOCK_MEMORY: diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index ed3a6cabdb3..937cb427a47 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -153,7 +153,7 @@ buf_LRU_stat_update(); @param id page identifier @param hash_lock buf_pool.page_hash latch (will be released here) */ void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id, - rw_lock_t *hash_lock) + page_hash_latch *hash_lock) MY_ATTRIBUTE((nonnull)); #ifdef UNIV_DEBUG diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index 1802bd57ddd..55bd2ac3a5a 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -192,10 +192,43 @@ extern const byte field_ref_zero[UNIV_PAGE_SIZE_MAX]; #include "ut0mutex.h" #include "sync0rw.h" +#include "rw_lock.h" typedef ib_mutex_t BufPoolMutex; typedef ib_mutex_t FlushListMutex; typedef rw_lock_t BPageLock; + +class page_hash_latch : public rw_lock +{ +public: + /** Wait for a shared lock */ + void read_lock_wait(); + /** Wait for an exclusive lock */ + void write_lock_wait(); + + /** Acquire a shared lock */ + inline void read_lock(); + /** Acquire an exclusive lock */ + inline void write_lock(); + + /** Acquire a lock */ + template<bool exclusive> void acquire() + { + if (exclusive) + write_lock(); + else + read_lock(); + } + /** Release a lock */ + template<bool exclusive> void release() + { + if (exclusive) + write_unlock(); + else + read_unlock(); + } +}; + #endif /* !UNIV_INNOCHECKSUM */ #endif /* buf0types.h */ diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h index 58da36fee5e..981ff5a0814 100644 --- a/storage/innobase/include/hash0hash.h +++ b/storage/innobase/include/hash0hash.h @@ -33,8 +33,6 @@ struct hash_cell_t{ }; typedef void* hash_node_t; -#define hash_calc_hash(FOLD, TABLE) (TABLE)->calc_hash(FOLD) - /*******************************************************************//** Inserts a struct to a hash table. */ @@ -145,7 +143,7 @@ Gets the next struct in a hash chain, NULL if none. */ Looks for a struct in a hash table. */ #define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\ {\ - (DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\ + (DATA) = (TYPE) HASH_GET_FIRST(TABLE, (TABLE)->calc_hash(FOLD)); \ HASH_ASSERT_VALID(DATA);\ \ while ((DATA) != NULL) {\ diff --git a/storage/innobase/include/rw_lock.h b/storage/innobase/include/rw_lock.h new file mode 100644 index 00000000000..613adfef3f5 --- /dev/null +++ b/storage/innobase/include/rw_lock.h @@ -0,0 +1,106 @@ +/***************************************************************************** + +Copyright (c) 2020, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +#pragma once +#include <atomic> +#include "my_dbug.h" + +/** Simple read-write lock based on std::atomic */ +class rw_lock +{ + /** The lock word */ + std::atomic<uint32_t> lock; + +protected: + /** Available lock */ + static constexpr uint32_t UNLOCKED= 0; + /** Flag to indicate that write_lock() is being held */ + static constexpr uint32_t WRITER= 1 << 31; + /** Flag to indicate that write_lock_wait() is pending */ + static constexpr uint32_t WRITER_WAITING= 1 << 30; + /** Flag to indicate that write_lock() or write_lock_wait() is pending */ + static constexpr uint32_t WRITER_PENDING= WRITER | WRITER_WAITING; + + /** Yield a read lock request due to a conflict with a write lock. + @return the lock value */ + uint32_t read_lock_yield() + { + uint32_t l= lock.fetch_sub(1, std::memory_order_relaxed); + DBUG_ASSERT(l & ~WRITER_PENDING); + return l; + } + /** Start waiting for an exclusive lock. */ + void write_lock_wait_start() + { lock.fetch_or(WRITER_WAITING, std::memory_order_relaxed); } + /** Wait for an exclusive lock. + @return whether the exclusive lock was acquired */ + bool write_lock_poll() + { + auto l= WRITER_WAITING; + if (lock.compare_exchange_strong(l, WRITER, std::memory_order_acquire, + std::memory_order_relaxed)) + return true; + if (!(l & WRITER_WAITING)) + /* write_lock() must have succeeded for another thread */ + write_lock_wait_start(); + return false; + } + +public: + /** Default constructor */ + rw_lock() : lock(UNLOCKED) {} + + /** Release a shared lock */ + void read_unlock() + { + IF_DBUG_ASSERT(auto l=,) lock.fetch_sub(1, std::memory_order_release); + DBUG_ASSERT(l & ~WRITER_PENDING); /* at least one read lock */ + DBUG_ASSERT(!(l & WRITER)); /* no write lock must have existed */ + } + /** Release an exclusive lock */ + void write_unlock() + { + IF_DBUG_ASSERT(auto l=,) lock.fetch_sub(WRITER, std::memory_order_release); + DBUG_ASSERT(l & WRITER); /* the write lock must have existed */ + } + /** Try to acquire a shared lock. + @return whether the lock was acquired */ + bool read_trylock() + { return !(lock.fetch_add(1, std::memory_order_acquire) & WRITER_PENDING); } + /** Try to acquire an exclusive lock. + @return whether the lock was acquired */ + bool write_trylock() + { + auto l= UNLOCKED; + return lock.compare_exchange_strong(l, WRITER, std::memory_order_acquire, + std::memory_order_relaxed); + } + + /** @return whether an exclusive lock is being held by any thread */ + bool is_write_locked() const + { return !!(lock.load(std::memory_order_relaxed) & WRITER); } + /** @return whether a shared lock is being held by any thread */ + bool is_read_locked() const + { + auto l= lock.load(std::memory_order_relaxed); + return (l & ~WRITER_PENDING) && !(l & WRITER); + } + /** @return whether any lock is being held by any thread */ + bool is_locked() const + { return (lock.load(std::memory_order_relaxed) & ~WRITER_WAITING) != 0; } +}; diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 2aa874edfad..d4b6425c44b 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -333,8 +333,6 @@ extern const ulint srv_buf_pool_min_size; extern const ulint srv_buf_pool_def_size; /** Requested buffer pool chunk size */ extern ulong srv_buf_pool_chunk_unit; -/** Number of locks to protect buf_pool.page_hash */ -extern ulong srv_n_page_hash_locks; /** Scan depth for LRU flush batch i.e.: number of blocks scanned*/ extern ulong srv_LRU_scan_depth; /** Whether or not to flush neighbors of a block */ diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic index 7fcac01e5ba..169cbdd9aa5 100644 --- a/storage/innobase/include/sync0rw.ic +++ b/storage/innobase/include/sync0rw.ic @@ -226,22 +226,8 @@ rw_lock_lock_word_decr( caused by concurrent executions of rw_lock_s_lock(). */ -#if 1 /* FIXME: MDEV-22871 Spurious contention between rw_lock_s_lock() */ - - /* When the number of concurrently executing threads - exceeds the number of available processor cores, - multiple buf_pool.page_hash S-latch requests would - conflict here, mostly in buf_page_get_low(). We should - implement a simpler rw-lock where the S-latch - acquisition would be a simple fetch_add(1) followed by - either an optional load() loop to wait for the X-latch - to be released, or a fetch_sub(1) and a retry. - - For now, we work around the problem with a delay in - this loop. It helped a little on some systems and was - reducing performance on others. */ - (void) LF_BACKOFF(); -#endif + /* Note: unlike this implementation, rw_lock::read_lock() + allows concurrent calls without a spin loop */ } /* A real conflict was detected. */ diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index 7eb8250b63d..c63fedb43ee 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -126,7 +126,6 @@ extern mysql_pfs_key_t index_tree_rw_lock_key; extern mysql_pfs_key_t index_online_log_key; extern mysql_pfs_key_t dict_table_stats_key; extern mysql_pfs_key_t trx_sys_rw_lock_key; -extern mysql_pfs_key_t hash_table_locks_key; #endif /* UNIV_PFS_RWLOCK */ /** Prints info of the sync system. |