summaryrefslogtreecommitdiff
path: root/storage/innobase/include
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/include')
-rw-r--r--storage/innobase/include/btr0btr.h2
-rw-r--r--storage/innobase/include/btr0types.h3
-rw-r--r--storage/innobase/include/buf0buf.h205
-rw-r--r--storage/innobase/include/buf0dblwr.h69
-rw-r--r--storage/innobase/include/buf0flu.h22
-rw-r--r--storage/innobase/include/buf0rea.h9
-rw-r--r--storage/innobase/include/fil0fil.h57
-rw-r--r--storage/innobase/include/mtr0mtr.h3
-rw-r--r--storage/innobase/include/srv0srv.h17
-rw-r--r--storage/innobase/include/trx0sys.h11
-rw-r--r--storage/innobase/include/trx0trx.h4
11 files changed, 239 insertions, 163 deletions
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index a1cc10b05db..a56598d3620 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -445,7 +445,7 @@ Gets the root node of a tree and x- or s-latches it.
buf_block_t*
btr_root_block_get(
/*===============*/
- const dict_index_t* index, /*!< in: index tree */
+ dict_index_t* index, /*!< in: index tree */
rw_lock_type_t mode, /*!< in: either RW_S_LATCH
or RW_X_LATCH */
mtr_t* mtr, /*!< in: mtr */
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index 912c022c64f..fc829e7857a 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -103,6 +103,9 @@ enum btr_latch_mode {
dict_index_t::lock is being held in non-exclusive mode. */
BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF
| BTR_ALREADY_S_LATCHED,
+ /** Attempt to modify records in an x-latched tree. */
+ BTR_MODIFY_TREE_ALREADY_LATCHED = BTR_MODIFY_TREE
+ | BTR_ALREADY_S_LATCHED,
/** U-latch root and X-latch a leaf page, assuming that
dict_index_t::lock is being held in U mode. */
BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index c02180ad9ee..74905638698 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -720,13 +720,14 @@ public:
ut_ad(s < REINIT);
}
- void read_unfix(uint32_t s)
+ uint32_t read_unfix(uint32_t s)
{
ut_ad(lock.is_write_locked());
ut_ad(s == UNFIXED + 1 || s == IBUF_EXIST + 1 || s == REINIT + 1);
- ut_d(auto old_state=) zip.fix.fetch_add(s - READ_FIX);
+ uint32_t old_state= zip.fix.fetch_add(s - READ_FIX);
ut_ad(old_state >= READ_FIX);
ut_ad(old_state < WRITE_FIX);
+ return old_state + (s - READ_FIX);
}
void set_freed(uint32_t prev_state, uint32_t count= 0)
@@ -777,11 +778,11 @@ public:
it from buf_pool.flush_list */
inline void write_complete(bool temporary);
- /** Write a flushable page to a file. buf_pool.mutex must be held.
- @param lru true=buf_pool.LRU; false=buf_pool.flush_list
+ /** Write a flushable page to a file or free a freeable block.
+ @param evict whether to evict the page on write completion
@param space tablespace
- @return whether the page was flushed and buf_pool.mutex was released */
- inline bool flush(bool lru, fil_space_t *space);
+ @return whether a page write was initiated and buf_pool.mutex released */
+ bool flush(bool evict, fil_space_t *space);
/** Notify that a page in a temporary tablespace has been modified. */
void set_temp_modified()
@@ -851,8 +852,6 @@ public:
/** @return whether the block is mapped to a data file */
bool in_file() const { return state() >= FREED; }
- /** @return whether the block is modified and ready for flushing */
- inline bool ready_for_flush() const;
/** @return whether the block can be relocated in memory.
The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
inline bool can_relocate() const;
@@ -1025,10 +1024,10 @@ Compute the hash fold value for blocks in buf_pool.zip_hash. */
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
/* @} */
-/** A "Hazard Pointer" class used to iterate over page lists
-inside the buffer pool. A hazard pointer is a buf_page_t pointer
+/** A "Hazard Pointer" class used to iterate over buf_pool.LRU or
+buf_pool.flush_list. A hazard pointer is a buf_page_t pointer
which we intend to iterate over next and we want it remain valid
-even after we release the buffer pool mutex. */
+even after we release the mutex that protects the list. */
class HazardPointer
{
public:
@@ -1143,7 +1142,8 @@ struct buf_buddy_free_t {
/*!< Node of zip_free list */
};
-/** @brief The buffer pool statistics structure. */
+/** @brief The buffer pool statistics structure;
+protected by buf_pool.mutex unless otherwise noted. */
struct buf_pool_stat_t{
/** Initialize the counters */
void init() { memset((void*) this, 0, sizeof *this); }
@@ -1152,9 +1152,8 @@ struct buf_pool_stat_t{
/*!< number of page gets performed;
also successful searches through
the adaptive hash index are
- counted as page gets; this field
- is NOT protected by the buffer
- pool mutex */
+ counted as page gets;
+ NOT protected by buf_pool.mutex */
ulint n_pages_read; /*!< number read operations */
ulint n_pages_written;/*!< number write operations */
ulint n_pages_created;/*!< number of pages created
@@ -1172,10 +1171,9 @@ struct buf_pool_stat_t{
young because the first access
was not long enough ago, in
buf_page_peek_if_too_old() */
- /** number of waits for eviction; writes protected by buf_pool.mutex */
+ /** number of waits for eviction */
ulint LRU_waits;
ulint LRU_bytes; /*!< LRU size in bytes */
- ulint flush_list_bytes;/*!< flush_list size in bytes */
};
/** Statistics of buddy blocks of a given size. */
@@ -1496,6 +1494,11 @@ public:
n_chunks_new / 4 * chunks->size;
}
+ /** @return whether the buffer pool has run out */
+ TPOOL_SUPPRESS_TSAN
+ bool ran_out() const
+ { return UNIV_UNLIKELY(!try_LRU_scan || !UT_LIST_GET_LEN(free)); }
+
/** @return whether the buffer pool is shrinking */
inline bool is_shrinking() const
{
@@ -1533,17 +1536,10 @@ public:
/** Buffer pool mutex */
alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex;
- /** Number of pending LRU flush; protected by mutex. */
- ulint n_flush_LRU_;
- /** broadcast when n_flush_LRU reaches 0; protected by mutex */
- pthread_cond_t done_flush_LRU;
- /** Number of pending flush_list flush; protected by mutex */
- ulint n_flush_list_;
- /** broadcast when n_flush_list reaches 0; protected by mutex */
- pthread_cond_t done_flush_list;
-
- TPOOL_SUPPRESS_TSAN ulint n_flush_LRU() const { return n_flush_LRU_; }
- TPOOL_SUPPRESS_TSAN ulint n_flush_list() const { return n_flush_list_; }
+ /** current statistics; protected by mutex */
+ buf_pool_stat_t stat;
+ /** old statistics; protected by mutex */
+ buf_pool_stat_t old_stat;
/** @name General fields */
/* @{ */
@@ -1704,11 +1700,12 @@ public:
buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
/*!< Statistics of buddy system,
indexed by block size */
- buf_pool_stat_t stat; /*!< current statistics */
- buf_pool_stat_t old_stat; /*!< old statistics */
/* @} */
+ /** number of index page splits */
+ Atomic_counter<ulint> pages_split;
+
/** @name Page flushing algorithm fields */
/* @{ */
@@ -1717,7 +1714,10 @@ public:
alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_list_mutex;
/** "hazard pointer" for flush_list scans; protected by flush_list_mutex */
FlushHp flush_hp;
- /** modified blocks (a subset of LRU) */
+ /** flush_list size in bytes; protected by flush_list_mutex */
+ ulint flush_list_bytes;
+ /** possibly modified persistent pages (a subset of LRU);
+ buf_dblwr.pending_writes() is approximately COUNT(is_write_fixed()) */
UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
/** number of blocks ever added to flush_list;
sometimes protected by flush_list_mutex */
@@ -1726,28 +1726,70 @@ public:
TPOOL_SUPPRESS_TSAN void add_flush_list_requests(size_t size)
{ ut_ad(size); flush_list_requests+= size; }
private:
- /** whether the page cleaner needs wakeup from indefinite sleep */
- bool page_cleaner_is_idle;
+ static constexpr unsigned PAGE_CLEANER_IDLE= 1;
+ static constexpr unsigned FLUSH_LIST_ACTIVE= 2;
+ static constexpr unsigned LRU_FLUSH= 4;
+
+ /** Number of pending LRU flush * LRU_FLUSH +
+ PAGE_CLEANER_IDLE + FLUSH_LIST_ACTIVE flags */
+ unsigned page_cleaner_status;
/** track server activity count for signaling idle flushing */
ulint last_activity_count;
public:
/** signalled to wake up the page_cleaner; protected by flush_list_mutex */
pthread_cond_t do_flush_list;
+ /** broadcast when !n_flush(); protected by flush_list_mutex */
+ pthread_cond_t done_flush_LRU;
+ /** broadcast when a batch completes; protected by flush_list_mutex */
+ pthread_cond_t done_flush_list;
+
+ /** @return number of pending LRU flush */
+ unsigned n_flush() const
+ {
+ mysql_mutex_assert_owner(&flush_list_mutex);
+ return page_cleaner_status / LRU_FLUSH;
+ }
+
+ /** Increment the number of pending LRU flush */
+ inline void n_flush_inc();
+
+ /** Decrement the number of pending LRU flush */
+ inline void n_flush_dec();
+
+ /** @return whether flush_list flushing is active */
+ bool flush_list_active() const
+ {
+ mysql_mutex_assert_owner(&flush_list_mutex);
+ return page_cleaner_status & FLUSH_LIST_ACTIVE;
+ }
+
+ void flush_list_set_active()
+ {
+ ut_ad(!flush_list_active());
+ page_cleaner_status+= FLUSH_LIST_ACTIVE;
+ }
+ void flush_list_set_inactive()
+ {
+ ut_ad(flush_list_active());
+ page_cleaner_status-= FLUSH_LIST_ACTIVE;
+ }
/** @return whether the page cleaner must sleep due to being idle */
bool page_cleaner_idle() const noexcept
{
mysql_mutex_assert_owner(&flush_list_mutex);
- return page_cleaner_is_idle;
+ return page_cleaner_status & PAGE_CLEANER_IDLE;
}
- /** Wake up the page cleaner if needed */
- void page_cleaner_wakeup();
+ /** Wake up the page cleaner if needed.
+ @param for_LRU whether to wake up for LRU eviction */
+ void page_cleaner_wakeup(bool for_LRU= false);
/** Register whether an explicit wakeup of the page cleaner is needed */
void page_cleaner_set_idle(bool deep_sleep)
{
mysql_mutex_assert_owner(&flush_list_mutex);
- page_cleaner_is_idle= deep_sleep;
+ page_cleaner_status= (page_cleaner_status & ~PAGE_CLEANER_IDLE) |
+ (PAGE_CLEANER_IDLE * deep_sleep);
}
/** Update server last activity count */
@@ -1757,9 +1799,6 @@ public:
last_activity_count= activity_count;
}
- // n_flush_LRU() + n_flush_list()
- // is approximately COUNT(is_write_fixed()) in flush_list
-
unsigned freed_page_clock;/*!< a sequence number used
to count the number of buffer
blocks removed from the end of
@@ -1769,16 +1808,10 @@ public:
to read this for heuristic
purposes without holding any
mutex or latch */
- bool try_LRU_scan; /*!< Cleared when an LRU
- scan for free block fails. This
- flag is used to avoid repeated
- scans of LRU list when we know
- that there is no free block
- available in the scan depth for
- eviction. Set whenever
- we flush a batch from the
- buffer pool. Protected by the
- buf_pool.mutex */
+ /** Cleared when buf_LRU_get_free_block() fails.
+ Set whenever the free list grows, along with a broadcast of done_free.
+ Protected by buf_pool.mutex. */
+ Atomic_relaxed<bool> try_LRU_scan;
/* @} */
/** @name LRU replacement algorithm fields */
@@ -1787,7 +1820,8 @@ public:
UT_LIST_BASE_NODE_T(buf_page_t) free;
/*!< base node of the free
block list */
- /** signaled each time when the free list grows; protected by mutex */
+ /** broadcast each time when the free list grows or try_LRU_scan is set;
+ protected by mutex */
pthread_cond_t done_free;
UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
@@ -1847,29 +1881,16 @@ public:
{
if (n_pend_reads)
return true;
- mysql_mutex_lock(&mutex);
- const bool any_pending{n_flush_LRU_ || n_flush_list_};
- mysql_mutex_unlock(&mutex);
+ mysql_mutex_lock(&flush_list_mutex);
+ const bool any_pending= page_cleaner_status > PAGE_CLEANER_IDLE ||
+ buf_dblwr.pending_writes();
+ mysql_mutex_unlock(&flush_list_mutex);
return any_pending;
}
- /** @return total amount of pending I/O */
- ulint io_pending() const
- {
- return n_pend_reads + n_flush_LRU() + n_flush_list();
- }
-private:
- /** Remove a block from the flush list. */
- inline void delete_from_flush_list_low(buf_page_t *bpage) noexcept;
- /** Remove a block from flush_list.
- @param bpage buffer pool page
- @param clear whether to invoke buf_page_t::clear_oldest_modification() */
- void delete_from_flush_list(buf_page_t *bpage, bool clear) noexcept;
-public:
/** Remove a block from flush_list.
@param bpage buffer pool page */
- void delete_from_flush_list(buf_page_t *bpage) noexcept
- { delete_from_flush_list(bpage, true); }
+ void delete_from_flush_list(buf_page_t *bpage) noexcept;
/** Prepare to insert a modified blcok into flush_list.
@param lsn start LSN of the mini-transaction
@@ -1884,7 +1905,7 @@ public:
lsn_t lsn) noexcept;
/** Free a page whose underlying file page has been freed. */
- inline void release_freed_page(buf_page_t *bpage) noexcept;
+ ATTRIBUTE_COLD void release_freed_page(buf_page_t *bpage) noexcept;
private:
/** Temporary memory for page_compressed and encrypted I/O */
@@ -1895,34 +1916,12 @@ private:
/** array of slots */
buf_tmp_buffer_t *slots;
- void create(ulint n_slots)
- {
- this->n_slots= n_slots;
- slots= static_cast<buf_tmp_buffer_t*>
- (ut_malloc_nokey(n_slots * sizeof *slots));
- memset((void*) slots, 0, n_slots * sizeof *slots);
- }
+ void create(ulint n_slots);
- void close()
- {
- for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
- {
- aligned_free(s->crypt_buf);
- aligned_free(s->comp_buf);
- }
- ut_free(slots);
- slots= nullptr;
- n_slots= 0;
- }
+ void close();
/** Reserve a buffer */
- buf_tmp_buffer_t *reserve()
- {
- for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
- if (s->acquire())
- return s;
- return nullptr;
- }
+ buf_tmp_buffer_t *reserve();
} io_buf;
/** whether resize() is in the critical path */
@@ -2011,7 +2010,10 @@ inline void buf_page_t::set_oldest_modification(lsn_t lsn)
/** Clear oldest_modification after removing from buf_pool.flush_list */
inline void buf_page_t::clear_oldest_modification()
{
- mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
+#ifdef SAFE_MUTEX
+ if (oldest_modification() != 2)
+ mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
+#endif /* SAFE_MUTEX */
ut_d(const auto s= state());
ut_ad(s >= REMOVE_HASH);
ut_ad(oldest_modification());
@@ -2023,17 +2025,6 @@ inline void buf_page_t::clear_oldest_modification()
oldest_modification_.store(0, std::memory_order_release);
}
-/** @return whether the block is modified and ready for flushing */
-inline bool buf_page_t::ready_for_flush() const
-{
- mysql_mutex_assert_owner(&buf_pool.mutex);
- ut_ad(in_LRU_list);
- const auto s= state();
- ut_a(s >= FREED);
- ut_ad(!fsp_is_system_temporary(id().space()) || oldest_modification() == 2);
- return s < READ_FIX;
-}
-
/** @return whether the block can be relocated in memory.
The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
inline bool buf_page_t::can_relocate() const
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index fb9df55504c..d9c9239c0b4 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2020, MariaDB Corporation.
+Copyright (c) 2017, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -54,9 +54,9 @@ class buf_dblwr_t
};
/** the page number of the first doublewrite block (block_size() pages) */
- page_id_t block1= page_id_t(0, 0);
+ page_id_t block1{0, 0};
/** the page number of the second doublewrite block (block_size() pages) */
- page_id_t block2= page_id_t(0, 0);
+ page_id_t block2{0, 0};
/** mutex protecting the data members below */
mysql_mutex_t mutex;
@@ -72,11 +72,15 @@ class buf_dblwr_t
ulint writes_completed;
/** number of pages written by flush_buffered_writes_completed() */
ulint pages_written;
+ /** condition variable for !writes_pending */
+ pthread_cond_t write_cond;
+ /** number of pending page writes */
+ size_t writes_pending;
slot slots[2];
- slot *active_slot= &slots[0];
+ slot *active_slot;
- /** Initialize the doublewrite buffer data structure.
+ /** Initialise the persistent storage of the doublewrite buffer.
@param header doublewrite page header in the TRX_SYS page */
inline void init(const byte *header);
@@ -84,6 +88,8 @@ class buf_dblwr_t
bool flush_buffered_writes(const ulint size);
public:
+ /** Initialise the doublewrite buffer data structures. */
+ void init();
/** Create or restore the doublewrite buffer in the TRX_SYS page.
@return whether the operation succeeded */
bool create();
@@ -118,7 +124,7 @@ public:
void recover();
/** Update the doublewrite buffer on data page write completion. */
- void write_completed();
+ void write_completed(bool with_doublewrite);
/** Flush possible buffered writes to persistent storage.
It is very important to call this function after a batch of writes has been
posted, and also when we may have to wait for a page latch!
@@ -137,14 +143,14 @@ public:
@param size payload size in bytes */
void add_to_batch(const IORequest &request, size_t size);
- /** Determine whether the doublewrite buffer is initialized */
- bool is_initialised() const
+ /** Determine whether the doublewrite buffer has been created */
+ bool is_created() const
{ return UNIV_LIKELY(block1 != page_id_t(0, 0)); }
/** @return whether a page identifier is part of the doublewrite buffer */
bool is_inside(const page_id_t id) const
{
- if (!is_initialised())
+ if (!is_created())
return false;
ut_ad(block1 < block2);
if (id < block1)
@@ -156,13 +162,44 @@ public:
/** Wait for flush_buffered_writes() to be fully completed */
void wait_flush_buffered_writes()
{
- if (is_initialised())
- {
- mysql_mutex_lock(&mutex);
- while (batch_running)
- my_cond_wait(&cond, &mutex.m_mutex);
- mysql_mutex_unlock(&mutex);
- }
+ mysql_mutex_lock(&mutex);
+ while (batch_running)
+ my_cond_wait(&cond, &mutex.m_mutex);
+ mysql_mutex_unlock(&mutex);
+ }
+
+ /** Register an unbuffered page write */
+ void add_unbuffered()
+ {
+ mysql_mutex_lock(&mutex);
+ writes_pending++;
+ mysql_mutex_unlock(&mutex);
+ }
+
+ size_t pending_writes()
+ {
+ mysql_mutex_lock(&mutex);
+ const size_t pending{writes_pending};
+ mysql_mutex_unlock(&mutex);
+ return pending;
+ }
+
+ /** Wait for writes_pending to reach 0 */
+ void wait_for_page_writes()
+ {
+ mysql_mutex_lock(&mutex);
+ while (writes_pending)
+ my_cond_wait(&write_cond, &mutex.m_mutex);
+ mysql_mutex_unlock(&mutex);
+ }
+
+ /** Wait for writes_pending to reach 0 */
+ void wait_for_page_writes(const timespec &abstime)
+ {
+ mysql_mutex_lock(&mutex);
+ while (writes_pending)
+ my_cond_timedwait(&write_cond, &mutex.m_mutex, &abstime);
+ mysql_mutex_unlock(&mutex);
}
};
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index af38f61b13b..31fe4446681 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -30,10 +30,8 @@ Created 11/5/1995 Heikki Tuuri
#include "log0log.h"
#include "buf0buf.h"
-/** Number of pages flushed. Protected by buf_pool.mutex. */
-extern ulint buf_flush_page_count;
/** Number of pages flushed via LRU. Protected by buf_pool.mutex.
-Also included in buf_flush_page_count. */
+Also included in buf_pool.stat.n_pages_written. */
extern ulint buf_lru_flush_page_count;
/** Number of pages freed without flushing. Protected by buf_pool.mutex. */
extern ulint buf_lru_freed_page_count;
@@ -86,15 +84,18 @@ buf_flush_init_for_writing(
bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed= nullptr)
MY_ATTRIBUTE((warn_unused_result));
-/** Write out dirty blocks from buf_pool.LRU.
+/** Write out dirty blocks from buf_pool.LRU,
+and move clean blocks to buf_pool.free.
+The caller must invoke buf_dblwr.flush_buffered_writes()
+after releasing buf_pool.mutex.
@param max_n wished maximum mumber of blocks flushed
-@return the number of processed pages
+@param evict whether to evict pages after flushing
+@return evict ? number of processed pages : number of pages written
@retval 0 if a buf_pool.LRU batch is already running */
-ulint buf_flush_LRU(ulint max_n);
+ulint buf_flush_LRU(ulint max_n, bool evict);
-/** Wait until a flush batch ends.
-@param lru true=buf_pool.LRU; false=buf_pool.flush_list */
-void buf_flush_wait_batch_end(bool lru);
+/** Wait until a LRU flush batch ends. */
+void buf_flush_wait_LRU_batch_end();
/** Wait until all persistent pages are flushed up to a limit.
@param sync_lsn buf_pool.get_oldest_modification(LSN_MAX) to wait for */
ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn);
@@ -106,9 +107,6 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious);
/** Initialize page_cleaner. */
ATTRIBUTE_COLD void buf_flush_page_cleaner_init();
-/** Wait for pending flushes to complete. */
-void buf_flush_wait_batch_end_acquiring_mutex(bool lru);
-
/** Flush the buffer pool on shutdown. */
ATTRIBUTE_COLD void buf_flush_buffer_pool();
diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h
index 986a792b97e..4ec8938c689 100644
--- a/storage/innobase/include/buf0rea.h
+++ b/storage/innobase/include/buf0rea.h
@@ -33,10 +33,11 @@ Created 11/5/1995 Heikki Tuuri
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@retval DB_SUCCESS if the page was read and is not corrupted,
-@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
+@param page_id page id
+@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
+@retval DB_SUCCESS if the page was read and is not corrupted
+@retval DB_SUCCESS_LOCKED_REC if the page was not read
+@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 940e1b68458..483d594c6b9 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -898,11 +898,13 @@ public:
@param purpose tablespace purpose
@param crypt_data encryption information
@param mode encryption mode
+ @param opened true if space files are opened
@return pointer to created tablespace, to be filled in with add()
@retval nullptr on failure (such as when the same tablespace exists) */
static fil_space_t *create(uint32_t id, uint32_t flags,
fil_type_t purpose, fil_space_crypt_t *crypt_data,
- fil_encryption_t mode= FIL_ENCRYPTION_DEFAULT);
+ fil_encryption_t mode= FIL_ENCRYPTION_DEFAULT,
+ bool opened= false);
MY_ATTRIBUTE((warn_unused_result))
/** Acquire a tablespace reference.
@@ -1107,7 +1109,7 @@ private:
inline bool fil_space_t::use_doublewrite() const
{
return !UT_LIST_GET_FIRST(chain)->atomic_write && srv_use_doublewrite_buf &&
- buf_dblwr.is_initialised();
+ buf_dblwr.is_created();
}
inline void fil_space_t::set_imported()
@@ -1384,6 +1386,11 @@ struct fil_system_t
private:
bool m_initialised;
+
+ /** Points to the last opened space in space_list. Protected with
+ fil_system.mutex. */
+ fil_space_t *space_list_last_opened= nullptr;
+
#ifdef __linux__
/** available block devices that reside on non-rotational storage */
std::vector<dev_t> ssd;
@@ -1425,7 +1432,8 @@ public:
/** nonzero if fil_node_open_file_low() should avoid moving the tablespace
to the end of space_list, for FIFO policy of try_to_close() */
ulint freeze_space_list;
- /** list of all tablespaces */
+ /** List of all file spaces, opened spaces should be at the top of the list
+ to optimize try_to_close() execution. Protected with fil_system.mutex. */
ilist<fil_space_t, space_list_tag_t> space_list;
/** list of all tablespaces for which a FILE_MODIFY record has been written
since the latest redo log checkpoint.
@@ -1440,6 +1448,49 @@ public:
potential space_id reuse */
bool space_id_reuse_warned;
+ /** Add the file to the end of opened spaces list in
+ fil_system.space_list, so that fil_space_t::try_to_close() should close
+ it as a last resort.
+ @param space space to add */
+ void add_opened_last_to_space_list(fil_space_t *space);
+
+ /** Move the file to the end of opened spaces list in
+ fil_system.space_list, so that fil_space_t::try_to_close() should close
+ it as a last resort.
+ @param space space to move */
+ inline void move_opened_last_to_space_list(fil_space_t *space)
+ {
+ /* In the case when several files of the same space are added in a
+ row, there is no need to remove and add a space to the same position
+ in space_list. It can be for system or temporary tablespaces. */
+ if (freeze_space_list || space_list_last_opened == space)
+ return;
+
+ space_list.erase(space_list_t::iterator(space));
+ add_opened_last_to_space_list(space);
+ }
+
+ /** Move closed file last in fil_system.space_list, so that
+ fil_space_t::try_to_close() iterates opened files first in FIFO order,
+ i.e. first opened, first closed.
+ @param space space to move */
+ void move_closed_last_to_space_list(fil_space_t *space)
+ {
+ if (UNIV_UNLIKELY(freeze_space_list))
+ return;
+
+ space_list_t::iterator s= space_list_t::iterator(space);
+
+ if (space_list_last_opened == space)
+ {
+ space_list_t::iterator prev= s;
+ space_list_last_opened= &*--prev;
+ }
+
+ space_list.erase(s);
+ space_list.push_back(*space);
+ }
+
/** Return the next tablespace from default_encrypt_tables list.
@param space previous tablespace (nullptr to start from the start)
@param recheck whether the removal condition needs to be rechecked after
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index ccb174da596..b8df6d9f63e 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -342,6 +342,9 @@ public:
/** Upgrade U locks on a block to X */
void page_lock_upgrade(const buf_block_t &block);
+ /** Upgrade index U lock to X */
+ ATTRIBUTE_COLD void index_lock_upgrade();
+
/** Check if we are holding tablespace latch
@param space tablespace to search for
@return whether space.latch is being held */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 606d94818cf..b85fa518384 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -85,11 +85,6 @@ struct srv_stats_t
/** Count the amount of data written in total (in bytes) */
ulint_ctr_1_t data_written;
-
- /** Number of buffer pool reads that led to the reading of
- a disk page */
- ulint_ctr_1_t buf_pool_reads;
-
/** Number of bytes saved by page compression */
ulint_ctr_n_t page_compression_saved;
/* Number of pages compressed with page compression */
@@ -614,23 +609,11 @@ struct export_var_t{
char innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */
my_bool innodb_buffer_pool_load_incomplete;/*!< Buf pool load incomplete */
ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
- ulint innodb_buffer_pool_pages_data; /*!< Data pages */
ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */
- ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */
- ulint innodb_buffer_pool_bytes_dirty; /*!< File bytes modified */
ulint innodb_buffer_pool_pages_misc; /*!< Miscellanous pages */
- ulint innodb_buffer_pool_pages_free; /*!< Free pages */
#ifdef UNIV_DEBUG
ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */
#endif /* UNIV_DEBUG */
- ulint innodb_buffer_pool_pages_made_not_young;
- ulint innodb_buffer_pool_pages_made_young;
- ulint innodb_buffer_pool_pages_old;
- ulint innodb_buffer_pool_read_requests; /*!< buf_pool.stat.n_page_gets */
- ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */
- ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */
- ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
- ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
ulint innodb_checkpoint_age;
ulint innodb_checkpoint_max_age;
ulint innodb_data_pending_reads; /*!< Pending reads */
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 4d231077b12..245b981974b 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -924,14 +924,19 @@ public:
/**
Determine if the specified transaction or any older one might be active.
- @param caller_trx used to get/set pins
+ @param trx current transaction
@param id transaction identifier
@return whether any transaction not newer than id might be active
*/
- bool find_same_or_older(trx_t *caller_trx, trx_id_t id)
+ bool find_same_or_older(trx_t *trx, trx_id_t id)
{
- return rw_trx_hash.iterate(caller_trx, find_same_or_older_callback, &id);
+ if (trx->max_inactive_id >= id)
+ return false;
+ bool found= rw_trx_hash.iterate(trx, find_same_or_older_callback, &id);
+ if (!found)
+ trx->max_inactive_id= id;
+ return found;
}
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 7d2c3297769..152e794ac6a 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -642,6 +642,10 @@ public:
Cleared in commit_in_memory() after commit_state(),
trx_sys_t::deregister_rw(), release_locks(). */
trx_id_t id;
+ /** The largest encountered transaction identifier for which no
+ transaction was observed to be active. This is a cache to speed up
+ trx_sys_t::find_same_or_older(). */
+ trx_id_t max_inactive_id;
private:
/** mutex protecting state and some of lock