diff options
Diffstat (limited to 'storage/innobase/include')
-rw-r--r-- | storage/innobase/include/btr0btr.h | 2 | ||||
-rw-r--r-- | storage/innobase/include/btr0types.h | 3 | ||||
-rw-r--r-- | storage/innobase/include/buf0buf.h | 205 | ||||
-rw-r--r-- | storage/innobase/include/buf0dblwr.h | 69 | ||||
-rw-r--r-- | storage/innobase/include/buf0flu.h | 22 | ||||
-rw-r--r-- | storage/innobase/include/buf0rea.h | 9 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 57 | ||||
-rw-r--r-- | storage/innobase/include/mtr0mtr.h | 3 | ||||
-rw-r--r-- | storage/innobase/include/srv0srv.h | 17 | ||||
-rw-r--r-- | storage/innobase/include/trx0sys.h | 11 | ||||
-rw-r--r-- | storage/innobase/include/trx0trx.h | 4 |
11 files changed, 239 insertions, 163 deletions
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index a1cc10b05db..a56598d3620 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -445,7 +445,7 @@ Gets the root node of a tree and x- or s-latches it. buf_block_t* btr_root_block_get( /*===============*/ - const dict_index_t* index, /*!< in: index tree */ + dict_index_t* index, /*!< in: index tree */ rw_lock_type_t mode, /*!< in: either RW_S_LATCH or RW_X_LATCH */ mtr_t* mtr, /*!< in: mtr */ diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h index 912c022c64f..fc829e7857a 100644 --- a/storage/innobase/include/btr0types.h +++ b/storage/innobase/include/btr0types.h @@ -103,6 +103,9 @@ enum btr_latch_mode { dict_index_t::lock is being held in non-exclusive mode. */ BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED, + /** Attempt to modify records in an x-latched tree. */ + BTR_MODIFY_TREE_ALREADY_LATCHED = BTR_MODIFY_TREE + | BTR_ALREADY_S_LATCHED, /** U-latch root and X-latch a leaf page, assuming that dict_index_t::lock is being held in U mode. */ BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index c02180ad9ee..74905638698 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -720,13 +720,14 @@ public: ut_ad(s < REINIT); } - void read_unfix(uint32_t s) + uint32_t read_unfix(uint32_t s) { ut_ad(lock.is_write_locked()); ut_ad(s == UNFIXED + 1 || s == IBUF_EXIST + 1 || s == REINIT + 1); - ut_d(auto old_state=) zip.fix.fetch_add(s - READ_FIX); + uint32_t old_state= zip.fix.fetch_add(s - READ_FIX); ut_ad(old_state >= READ_FIX); ut_ad(old_state < WRITE_FIX); + return old_state + (s - READ_FIX); } void set_freed(uint32_t prev_state, uint32_t count= 0) @@ -777,11 +778,11 @@ public: it from buf_pool.flush_list */ inline void write_complete(bool temporary); - /** Write a flushable page to a file. buf_pool.mutex must be held. - @param lru true=buf_pool.LRU; false=buf_pool.flush_list + /** Write a flushable page to a file or free a freeable block. + @param evict whether to evict the page on write completion @param space tablespace - @return whether the page was flushed and buf_pool.mutex was released */ - inline bool flush(bool lru, fil_space_t *space); + @return whether a page write was initiated and buf_pool.mutex released */ + bool flush(bool evict, fil_space_t *space); /** Notify that a page in a temporary tablespace has been modified. */ void set_temp_modified() @@ -851,8 +852,6 @@ public: /** @return whether the block is mapped to a data file */ bool in_file() const { return state() >= FREED; } - /** @return whether the block is modified and ready for flushing */ - inline bool ready_for_flush() const; /** @return whether the block can be relocated in memory. The block can be dirty, but it must not be I/O-fixed or bufferfixed. */ inline bool can_relocate() const; @@ -1025,10 +1024,10 @@ Compute the hash fold value for blocks in buf_pool.zip_hash. */ #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) /* @} */ -/** A "Hazard Pointer" class used to iterate over page lists -inside the buffer pool. A hazard pointer is a buf_page_t pointer +/** A "Hazard Pointer" class used to iterate over buf_pool.LRU or +buf_pool.flush_list. A hazard pointer is a buf_page_t pointer which we intend to iterate over next and we want it remain valid -even after we release the buffer pool mutex. */ +even after we release the mutex that protects the list. */ class HazardPointer { public: @@ -1143,7 +1142,8 @@ struct buf_buddy_free_t { /*!< Node of zip_free list */ }; -/** @brief The buffer pool statistics structure. */ +/** @brief The buffer pool statistics structure; +protected by buf_pool.mutex unless otherwise noted. */ struct buf_pool_stat_t{ /** Initialize the counters */ void init() { memset((void*) this, 0, sizeof *this); } @@ -1152,9 +1152,8 @@ struct buf_pool_stat_t{ /*!< number of page gets performed; also successful searches through the adaptive hash index are - counted as page gets; this field - is NOT protected by the buffer - pool mutex */ + counted as page gets; + NOT protected by buf_pool.mutex */ ulint n_pages_read; /*!< number read operations */ ulint n_pages_written;/*!< number write operations */ ulint n_pages_created;/*!< number of pages created @@ -1172,10 +1171,9 @@ struct buf_pool_stat_t{ young because the first access was not long enough ago, in buf_page_peek_if_too_old() */ - /** number of waits for eviction; writes protected by buf_pool.mutex */ + /** number of waits for eviction */ ulint LRU_waits; ulint LRU_bytes; /*!< LRU size in bytes */ - ulint flush_list_bytes;/*!< flush_list size in bytes */ }; /** Statistics of buddy blocks of a given size. */ @@ -1496,6 +1494,11 @@ public: n_chunks_new / 4 * chunks->size; } + /** @return whether the buffer pool has run out */ + TPOOL_SUPPRESS_TSAN + bool ran_out() const + { return UNIV_UNLIKELY(!try_LRU_scan || !UT_LIST_GET_LEN(free)); } + /** @return whether the buffer pool is shrinking */ inline bool is_shrinking() const { @@ -1533,17 +1536,10 @@ public: /** Buffer pool mutex */ alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex; - /** Number of pending LRU flush; protected by mutex. */ - ulint n_flush_LRU_; - /** broadcast when n_flush_LRU reaches 0; protected by mutex */ - pthread_cond_t done_flush_LRU; - /** Number of pending flush_list flush; protected by mutex */ - ulint n_flush_list_; - /** broadcast when n_flush_list reaches 0; protected by mutex */ - pthread_cond_t done_flush_list; - - TPOOL_SUPPRESS_TSAN ulint n_flush_LRU() const { return n_flush_LRU_; } - TPOOL_SUPPRESS_TSAN ulint n_flush_list() const { return n_flush_list_; } + /** current statistics; protected by mutex */ + buf_pool_stat_t stat; + /** old statistics; protected by mutex */ + buf_pool_stat_t old_stat; /** @name General fields */ /* @{ */ @@ -1704,11 +1700,12 @@ public: buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1]; /*!< Statistics of buddy system, indexed by block size */ - buf_pool_stat_t stat; /*!< current statistics */ - buf_pool_stat_t old_stat; /*!< old statistics */ /* @} */ + /** number of index page splits */ + Atomic_counter<ulint> pages_split; + /** @name Page flushing algorithm fields */ /* @{ */ @@ -1717,7 +1714,10 @@ public: alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_list_mutex; /** "hazard pointer" for flush_list scans; protected by flush_list_mutex */ FlushHp flush_hp; - /** modified blocks (a subset of LRU) */ + /** flush_list size in bytes; protected by flush_list_mutex */ + ulint flush_list_bytes; + /** possibly modified persistent pages (a subset of LRU); + buf_dblwr.pending_writes() is approximately COUNT(is_write_fixed()) */ UT_LIST_BASE_NODE_T(buf_page_t) flush_list; /** number of blocks ever added to flush_list; sometimes protected by flush_list_mutex */ @@ -1726,28 +1726,70 @@ public: TPOOL_SUPPRESS_TSAN void add_flush_list_requests(size_t size) { ut_ad(size); flush_list_requests+= size; } private: - /** whether the page cleaner needs wakeup from indefinite sleep */ - bool page_cleaner_is_idle; + static constexpr unsigned PAGE_CLEANER_IDLE= 1; + static constexpr unsigned FLUSH_LIST_ACTIVE= 2; + static constexpr unsigned LRU_FLUSH= 4; + + /** Number of pending LRU flush * LRU_FLUSH + + PAGE_CLEANER_IDLE + FLUSH_LIST_ACTIVE flags */ + unsigned page_cleaner_status; /** track server activity count for signaling idle flushing */ ulint last_activity_count; public: /** signalled to wake up the page_cleaner; protected by flush_list_mutex */ pthread_cond_t do_flush_list; + /** broadcast when !n_flush(); protected by flush_list_mutex */ + pthread_cond_t done_flush_LRU; + /** broadcast when a batch completes; protected by flush_list_mutex */ + pthread_cond_t done_flush_list; + + /** @return number of pending LRU flush */ + unsigned n_flush() const + { + mysql_mutex_assert_owner(&flush_list_mutex); + return page_cleaner_status / LRU_FLUSH; + } + + /** Increment the number of pending LRU flush */ + inline void n_flush_inc(); + + /** Decrement the number of pending LRU flush */ + inline void n_flush_dec(); + + /** @return whether flush_list flushing is active */ + bool flush_list_active() const + { + mysql_mutex_assert_owner(&flush_list_mutex); + return page_cleaner_status & FLUSH_LIST_ACTIVE; + } + + void flush_list_set_active() + { + ut_ad(!flush_list_active()); + page_cleaner_status+= FLUSH_LIST_ACTIVE; + } + void flush_list_set_inactive() + { + ut_ad(flush_list_active()); + page_cleaner_status-= FLUSH_LIST_ACTIVE; + } /** @return whether the page cleaner must sleep due to being idle */ bool page_cleaner_idle() const noexcept { mysql_mutex_assert_owner(&flush_list_mutex); - return page_cleaner_is_idle; + return page_cleaner_status & PAGE_CLEANER_IDLE; } - /** Wake up the page cleaner if needed */ - void page_cleaner_wakeup(); + /** Wake up the page cleaner if needed. + @param for_LRU whether to wake up for LRU eviction */ + void page_cleaner_wakeup(bool for_LRU= false); /** Register whether an explicit wakeup of the page cleaner is needed */ void page_cleaner_set_idle(bool deep_sleep) { mysql_mutex_assert_owner(&flush_list_mutex); - page_cleaner_is_idle= deep_sleep; + page_cleaner_status= (page_cleaner_status & ~PAGE_CLEANER_IDLE) | + (PAGE_CLEANER_IDLE * deep_sleep); } /** Update server last activity count */ @@ -1757,9 +1799,6 @@ public: last_activity_count= activity_count; } - // n_flush_LRU() + n_flush_list() - // is approximately COUNT(is_write_fixed()) in flush_list - unsigned freed_page_clock;/*!< a sequence number used to count the number of buffer blocks removed from the end of @@ -1769,16 +1808,10 @@ public: to read this for heuristic purposes without holding any mutex or latch */ - bool try_LRU_scan; /*!< Cleared when an LRU - scan for free block fails. This - flag is used to avoid repeated - scans of LRU list when we know - that there is no free block - available in the scan depth for - eviction. Set whenever - we flush a batch from the - buffer pool. Protected by the - buf_pool.mutex */ + /** Cleared when buf_LRU_get_free_block() fails. + Set whenever the free list grows, along with a broadcast of done_free. + Protected by buf_pool.mutex. */ + Atomic_relaxed<bool> try_LRU_scan; /* @} */ /** @name LRU replacement algorithm fields */ @@ -1787,7 +1820,8 @@ public: UT_LIST_BASE_NODE_T(buf_page_t) free; /*!< base node of the free block list */ - /** signaled each time when the free list grows; protected by mutex */ + /** broadcast each time when the free list grows or try_LRU_scan is set; + protected by mutex */ pthread_cond_t done_free; UT_LIST_BASE_NODE_T(buf_page_t) withdraw; @@ -1847,29 +1881,16 @@ public: { if (n_pend_reads) return true; - mysql_mutex_lock(&mutex); - const bool any_pending{n_flush_LRU_ || n_flush_list_}; - mysql_mutex_unlock(&mutex); + mysql_mutex_lock(&flush_list_mutex); + const bool any_pending= page_cleaner_status > PAGE_CLEANER_IDLE || + buf_dblwr.pending_writes(); + mysql_mutex_unlock(&flush_list_mutex); return any_pending; } - /** @return total amount of pending I/O */ - ulint io_pending() const - { - return n_pend_reads + n_flush_LRU() + n_flush_list(); - } -private: - /** Remove a block from the flush list. */ - inline void delete_from_flush_list_low(buf_page_t *bpage) noexcept; - /** Remove a block from flush_list. - @param bpage buffer pool page - @param clear whether to invoke buf_page_t::clear_oldest_modification() */ - void delete_from_flush_list(buf_page_t *bpage, bool clear) noexcept; -public: /** Remove a block from flush_list. @param bpage buffer pool page */ - void delete_from_flush_list(buf_page_t *bpage) noexcept - { delete_from_flush_list(bpage, true); } + void delete_from_flush_list(buf_page_t *bpage) noexcept; /** Prepare to insert a modified blcok into flush_list. @param lsn start LSN of the mini-transaction @@ -1884,7 +1905,7 @@ public: lsn_t lsn) noexcept; /** Free a page whose underlying file page has been freed. */ - inline void release_freed_page(buf_page_t *bpage) noexcept; + ATTRIBUTE_COLD void release_freed_page(buf_page_t *bpage) noexcept; private: /** Temporary memory for page_compressed and encrypted I/O */ @@ -1895,34 +1916,12 @@ private: /** array of slots */ buf_tmp_buffer_t *slots; - void create(ulint n_slots) - { - this->n_slots= n_slots; - slots= static_cast<buf_tmp_buffer_t*> - (ut_malloc_nokey(n_slots * sizeof *slots)); - memset((void*) slots, 0, n_slots * sizeof *slots); - } + void create(ulint n_slots); - void close() - { - for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) - { - aligned_free(s->crypt_buf); - aligned_free(s->comp_buf); - } - ut_free(slots); - slots= nullptr; - n_slots= 0; - } + void close(); /** Reserve a buffer */ - buf_tmp_buffer_t *reserve() - { - for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++) - if (s->acquire()) - return s; - return nullptr; - } + buf_tmp_buffer_t *reserve(); } io_buf; /** whether resize() is in the critical path */ @@ -2011,7 +2010,10 @@ inline void buf_page_t::set_oldest_modification(lsn_t lsn) /** Clear oldest_modification after removing from buf_pool.flush_list */ inline void buf_page_t::clear_oldest_modification() { - mysql_mutex_assert_owner(&buf_pool.flush_list_mutex); +#ifdef SAFE_MUTEX + if (oldest_modification() != 2) + mysql_mutex_assert_owner(&buf_pool.flush_list_mutex); +#endif /* SAFE_MUTEX */ ut_d(const auto s= state()); ut_ad(s >= REMOVE_HASH); ut_ad(oldest_modification()); @@ -2023,17 +2025,6 @@ inline void buf_page_t::clear_oldest_modification() oldest_modification_.store(0, std::memory_order_release); } -/** @return whether the block is modified and ready for flushing */ -inline bool buf_page_t::ready_for_flush() const -{ - mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(in_LRU_list); - const auto s= state(); - ut_a(s >= FREED); - ut_ad(!fsp_is_system_temporary(id().space()) || oldest_modification() == 2); - return s < READ_FIX; -} - /** @return whether the block can be relocated in memory. The block can be dirty, but it must not be I/O-fixed or bufferfixed. */ inline bool buf_page_t::can_relocate() const diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h index fb9df55504c..d9c9239c0b4 100644 --- a/storage/innobase/include/buf0dblwr.h +++ b/storage/innobase/include/buf0dblwr.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -54,9 +54,9 @@ class buf_dblwr_t }; /** the page number of the first doublewrite block (block_size() pages) */ - page_id_t block1= page_id_t(0, 0); + page_id_t block1{0, 0}; /** the page number of the second doublewrite block (block_size() pages) */ - page_id_t block2= page_id_t(0, 0); + page_id_t block2{0, 0}; /** mutex protecting the data members below */ mysql_mutex_t mutex; @@ -72,11 +72,15 @@ class buf_dblwr_t ulint writes_completed; /** number of pages written by flush_buffered_writes_completed() */ ulint pages_written; + /** condition variable for !writes_pending */ + pthread_cond_t write_cond; + /** number of pending page writes */ + size_t writes_pending; slot slots[2]; - slot *active_slot= &slots[0]; + slot *active_slot; - /** Initialize the doublewrite buffer data structure. + /** Initialise the persistent storage of the doublewrite buffer. @param header doublewrite page header in the TRX_SYS page */ inline void init(const byte *header); @@ -84,6 +88,8 @@ class buf_dblwr_t bool flush_buffered_writes(const ulint size); public: + /** Initialise the doublewrite buffer data structures. */ + void init(); /** Create or restore the doublewrite buffer in the TRX_SYS page. @return whether the operation succeeded */ bool create(); @@ -118,7 +124,7 @@ public: void recover(); /** Update the doublewrite buffer on data page write completion. */ - void write_completed(); + void write_completed(bool with_doublewrite); /** Flush possible buffered writes to persistent storage. It is very important to call this function after a batch of writes has been posted, and also when we may have to wait for a page latch! @@ -137,14 +143,14 @@ public: @param size payload size in bytes */ void add_to_batch(const IORequest &request, size_t size); - /** Determine whether the doublewrite buffer is initialized */ - bool is_initialised() const + /** Determine whether the doublewrite buffer has been created */ + bool is_created() const { return UNIV_LIKELY(block1 != page_id_t(0, 0)); } /** @return whether a page identifier is part of the doublewrite buffer */ bool is_inside(const page_id_t id) const { - if (!is_initialised()) + if (!is_created()) return false; ut_ad(block1 < block2); if (id < block1) @@ -156,13 +162,44 @@ public: /** Wait for flush_buffered_writes() to be fully completed */ void wait_flush_buffered_writes() { - if (is_initialised()) - { - mysql_mutex_lock(&mutex); - while (batch_running) - my_cond_wait(&cond, &mutex.m_mutex); - mysql_mutex_unlock(&mutex); - } + mysql_mutex_lock(&mutex); + while (batch_running) + my_cond_wait(&cond, &mutex.m_mutex); + mysql_mutex_unlock(&mutex); + } + + /** Register an unbuffered page write */ + void add_unbuffered() + { + mysql_mutex_lock(&mutex); + writes_pending++; + mysql_mutex_unlock(&mutex); + } + + size_t pending_writes() + { + mysql_mutex_lock(&mutex); + const size_t pending{writes_pending}; + mysql_mutex_unlock(&mutex); + return pending; + } + + /** Wait for writes_pending to reach 0 */ + void wait_for_page_writes() + { + mysql_mutex_lock(&mutex); + while (writes_pending) + my_cond_wait(&write_cond, &mutex.m_mutex); + mysql_mutex_unlock(&mutex); + } + + /** Wait for writes_pending to reach 0 */ + void wait_for_page_writes(const timespec &abstime) + { + mysql_mutex_lock(&mutex); + while (writes_pending) + my_cond_timedwait(&write_cond, &mutex.m_mutex, &abstime); + mysql_mutex_unlock(&mutex); } }; diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index af38f61b13b..31fe4446681 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -30,10 +30,8 @@ Created 11/5/1995 Heikki Tuuri #include "log0log.h" #include "buf0buf.h" -/** Number of pages flushed. Protected by buf_pool.mutex. */ -extern ulint buf_flush_page_count; /** Number of pages flushed via LRU. Protected by buf_pool.mutex. -Also included in buf_flush_page_count. */ +Also included in buf_pool.stat.n_pages_written. */ extern ulint buf_lru_flush_page_count; /** Number of pages freed without flushing. Protected by buf_pool.mutex. */ extern ulint buf_lru_freed_page_count; @@ -86,15 +84,18 @@ buf_flush_init_for_writing( bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed= nullptr) MY_ATTRIBUTE((warn_unused_result)); -/** Write out dirty blocks from buf_pool.LRU. +/** Write out dirty blocks from buf_pool.LRU, +and move clean blocks to buf_pool.free. +The caller must invoke buf_dblwr.flush_buffered_writes() +after releasing buf_pool.mutex. @param max_n wished maximum mumber of blocks flushed -@return the number of processed pages +@param evict whether to evict pages after flushing +@return evict ? number of processed pages : number of pages written @retval 0 if a buf_pool.LRU batch is already running */ -ulint buf_flush_LRU(ulint max_n); +ulint buf_flush_LRU(ulint max_n, bool evict); -/** Wait until a flush batch ends. -@param lru true=buf_pool.LRU; false=buf_pool.flush_list */ -void buf_flush_wait_batch_end(bool lru); +/** Wait until a LRU flush batch ends. */ +void buf_flush_wait_LRU_batch_end(); /** Wait until all persistent pages are flushed up to a limit. @param sync_lsn buf_pool.get_oldest_modification(LSN_MAX) to wait for */ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn); @@ -106,9 +107,6 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious); /** Initialize page_cleaner. */ ATTRIBUTE_COLD void buf_flush_page_cleaner_init(); -/** Wait for pending flushes to complete. */ -void buf_flush_wait_batch_end_acquiring_mutex(bool lru); - /** Flush the buffer pool on shutdown. */ ATTRIBUTE_COLD void buf_flush_buffer_pool(); diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h index 986a792b97e..4ec8938c689 100644 --- a/storage/innobase/include/buf0rea.h +++ b/storage/innobase/include/buf0rea.h @@ -33,10 +33,11 @@ Created 11/5/1995 Heikki Tuuri buffer buf_pool if it is not already there. Sets the io_fix flag and sets an exclusive lock on the buffer frame. The flag is cleared and the x-lock released by the i/o-handler thread. -@param[in] page_id page id -@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 -@retval DB_SUCCESS if the page was read and is not corrupted, -@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted, +@param page_id page id +@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 +@retval DB_SUCCESS if the page was read and is not corrupted +@retval DB_SUCCESS_LOCKED_REC if the page was not read +@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted @retval DB_DECRYPTION_FAILED if page post encryption checksum matches but after decryption normal page checksum does not match. @retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */ diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 940e1b68458..483d594c6b9 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -898,11 +898,13 @@ public: @param purpose tablespace purpose @param crypt_data encryption information @param mode encryption mode + @param opened true if space files are opened @return pointer to created tablespace, to be filled in with add() @retval nullptr on failure (such as when the same tablespace exists) */ static fil_space_t *create(uint32_t id, uint32_t flags, fil_type_t purpose, fil_space_crypt_t *crypt_data, - fil_encryption_t mode= FIL_ENCRYPTION_DEFAULT); + fil_encryption_t mode= FIL_ENCRYPTION_DEFAULT, + bool opened= false); MY_ATTRIBUTE((warn_unused_result)) /** Acquire a tablespace reference. @@ -1107,7 +1109,7 @@ private: inline bool fil_space_t::use_doublewrite() const { return !UT_LIST_GET_FIRST(chain)->atomic_write && srv_use_doublewrite_buf && - buf_dblwr.is_initialised(); + buf_dblwr.is_created(); } inline void fil_space_t::set_imported() @@ -1384,6 +1386,11 @@ struct fil_system_t private: bool m_initialised; + + /** Points to the last opened space in space_list. Protected with + fil_system.mutex. */ + fil_space_t *space_list_last_opened= nullptr; + #ifdef __linux__ /** available block devices that reside on non-rotational storage */ std::vector<dev_t> ssd; @@ -1425,7 +1432,8 @@ public: /** nonzero if fil_node_open_file_low() should avoid moving the tablespace to the end of space_list, for FIFO policy of try_to_close() */ ulint freeze_space_list; - /** list of all tablespaces */ + /** List of all file spaces, opened spaces should be at the top of the list + to optimize try_to_close() execution. Protected with fil_system.mutex. */ ilist<fil_space_t, space_list_tag_t> space_list; /** list of all tablespaces for which a FILE_MODIFY record has been written since the latest redo log checkpoint. @@ -1440,6 +1448,49 @@ public: potential space_id reuse */ bool space_id_reuse_warned; + /** Add the file to the end of opened spaces list in + fil_system.space_list, so that fil_space_t::try_to_close() should close + it as a last resort. + @param space space to add */ + void add_opened_last_to_space_list(fil_space_t *space); + + /** Move the file to the end of opened spaces list in + fil_system.space_list, so that fil_space_t::try_to_close() should close + it as a last resort. + @param space space to move */ + inline void move_opened_last_to_space_list(fil_space_t *space) + { + /* In the case when several files of the same space are added in a + row, there is no need to remove and add a space to the same position + in space_list. It can be for system or temporary tablespaces. */ + if (freeze_space_list || space_list_last_opened == space) + return; + + space_list.erase(space_list_t::iterator(space)); + add_opened_last_to_space_list(space); + } + + /** Move closed file last in fil_system.space_list, so that + fil_space_t::try_to_close() iterates opened files first in FIFO order, + i.e. first opened, first closed. + @param space space to move */ + void move_closed_last_to_space_list(fil_space_t *space) + { + if (UNIV_UNLIKELY(freeze_space_list)) + return; + + space_list_t::iterator s= space_list_t::iterator(space); + + if (space_list_last_opened == space) + { + space_list_t::iterator prev= s; + space_list_last_opened= &*--prev; + } + + space_list.erase(s); + space_list.push_back(*space); + } + /** Return the next tablespace from default_encrypt_tables list. @param space previous tablespace (nullptr to start from the start) @param recheck whether the removal condition needs to be rechecked after diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index ccb174da596..b8df6d9f63e 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -342,6 +342,9 @@ public: /** Upgrade U locks on a block to X */ void page_lock_upgrade(const buf_block_t &block); + /** Upgrade index U lock to X */ + ATTRIBUTE_COLD void index_lock_upgrade(); + /** Check if we are holding tablespace latch @param space tablespace to search for @return whether space.latch is being held */ diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 606d94818cf..b85fa518384 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -85,11 +85,6 @@ struct srv_stats_t /** Count the amount of data written in total (in bytes) */ ulint_ctr_1_t data_written; - - /** Number of buffer pool reads that led to the reading of - a disk page */ - ulint_ctr_1_t buf_pool_reads; - /** Number of bytes saved by page compression */ ulint_ctr_n_t page_compression_saved; /* Number of pages compressed with page compression */ @@ -614,23 +609,11 @@ struct export_var_t{ char innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */ my_bool innodb_buffer_pool_load_incomplete;/*!< Buf pool load incomplete */ ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */ - ulint innodb_buffer_pool_pages_data; /*!< Data pages */ ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */ - ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */ - ulint innodb_buffer_pool_bytes_dirty; /*!< File bytes modified */ ulint innodb_buffer_pool_pages_misc; /*!< Miscellanous pages */ - ulint innodb_buffer_pool_pages_free; /*!< Free pages */ #ifdef UNIV_DEBUG ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */ #endif /* UNIV_DEBUG */ - ulint innodb_buffer_pool_pages_made_not_young; - ulint innodb_buffer_pool_pages_made_young; - ulint innodb_buffer_pool_pages_old; - ulint innodb_buffer_pool_read_requests; /*!< buf_pool.stat.n_page_gets */ - ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */ - ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */ - ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */ - ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/ ulint innodb_checkpoint_age; ulint innodb_checkpoint_max_age; ulint innodb_data_pending_reads; /*!< Pending reads */ diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index 4d231077b12..245b981974b 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -924,14 +924,19 @@ public: /** Determine if the specified transaction or any older one might be active. - @param caller_trx used to get/set pins + @param trx current transaction @param id transaction identifier @return whether any transaction not newer than id might be active */ - bool find_same_or_older(trx_t *caller_trx, trx_id_t id) + bool find_same_or_older(trx_t *trx, trx_id_t id) { - return rw_trx_hash.iterate(caller_trx, find_same_or_older_callback, &id); + if (trx->max_inactive_id >= id) + return false; + bool found= rw_trx_hash.iterate(trx, find_same_or_older_callback, &id); + if (!found) + trx->max_inactive_id= id; + return found; } diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 7d2c3297769..152e794ac6a 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -642,6 +642,10 @@ public: Cleared in commit_in_memory() after commit_state(), trx_sys_t::deregister_rw(), release_locks(). */ trx_id_t id; + /** The largest encountered transaction identifier for which no + transaction was observed to be active. This is a cache to speed up + trx_sys_t::find_same_or_older(). */ + trx_id_t max_inactive_id; private: /** mutex protecting state and some of lock |