summaryrefslogtreecommitdiff
path: root/storage/innobase/include/buf0buf.h
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/include/buf0buf.h')
-rw-r--r--storage/innobase/include/buf0buf.h1366
1 files changed, 547 insertions, 819 deletions
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index a84ea047a54..2b4732a64a0 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -24,40 +24,30 @@ The database buffer pool high-level routines
Created 11/5/1995 Heikki Tuuri
*******************************************************/
-#ifndef buf0buf_h
-#define buf0buf_h
+#pragma once
/** Magic value to use instead of checksums when they are disabled */
#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
#include "fil0fil.h"
#include "mtr0types.h"
-#include "buf0types.h"
#include "span.h"
#include "assume_aligned.h"
+#include "buf0types.h"
#ifndef UNIV_INNOCHECKSUM
-#include "hash0hash.h"
#include "ut0byte.h"
#include "page0types.h"
#include "log0log.h"
#include "srv0srv.h"
+#include "transactional_lock_guard.h"
#include <ostream>
-// Forward declaration
-struct fil_addr_t;
-
/** @name Modes for buf_page_get_gen */
/* @{ */
#define BUF_GET 10 /*!< get always */
#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
the block young in the LRU list */
-#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but
- set no latch; we have
- separated this case, because
- it is error-prone programming
- not to set a latch, and it
- should be used with care */
#define BUF_GET_IF_IN_POOL_OR_WATCH 15
/*!< Get the page only if it's in the
buffer pool, if not then set a watch
@@ -65,7 +55,6 @@ struct fil_addr_t;
#define BUF_GET_POSSIBLY_FREED 16
/*!< Like BUF_GET, but do not mind
if the file page has been freed. */
-#define BUF_EVICT_IF_IN_POOL 20 /*!< evict a clean block if found */
/* @} */
/** If LRU list of a buf_pool is less than this size then LRU eviction
@@ -74,22 +63,6 @@ the blocks on free list. If LRU list is very small then we can end up
in thrashing. */
#define BUF_LRU_MIN_LEN 256
-/** buf_page_t::state() values, distinguishing buf_page_t and buf_block_t */
-enum buf_page_state
-{
- /** available in buf_pool.free or buf_pool.watch */
- BUF_BLOCK_NOT_USED,
- /** allocated for something else than a file page */
- BUF_BLOCK_MEMORY,
- /** a previously allocated file page, in transit to NOT_USED */
- BUF_BLOCK_REMOVE_HASH,
- /** a buf_block_t that is also in buf_pool.LRU */
- BUF_BLOCK_FILE_PAGE,
- /** the buf_page_t of a ROW_FORMAT=COMPRESSED page
- whose uncompressed page frame has been evicted */
- BUF_BLOCK_ZIP_PAGE
-};
-
/** This structure defines information we will fetch from each buffer pool. It
will be used to print table IO stats */
struct buf_pool_info_t
@@ -170,33 +143,10 @@ operator<<(
const page_id_t page_id);
#ifndef UNIV_INNOCHECKSUM
-/*********************************************************************//**
-Gets the current size of buffer buf_pool in bytes.
-@return size in bytes */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void);
-/*========================*/
-
-/********************************************************************//**
-Allocates a buf_page_t descriptor. This function must succeed. In case
-of failure we assert in this function. */
-UNIV_INLINE
-buf_page_t*
-buf_page_alloc_descriptor(void)
-/*===========================*/
- MY_ATTRIBUTE((malloc));
-/********************************************************************//**
-Free a buf_page_t descriptor. */
-UNIV_INLINE
-void
-buf_page_free_descriptor(
-/*=====================*/
- buf_page_t* bpage) /*!< in: bpage descriptor to free. */
- MY_ATTRIBUTE((nonnull));
+# define buf_pool_get_curr_size() srv_buf_pool_curr_size
/** Allocate a buffer block.
-@return own: the allocated block, in state BUF_BLOCK_MEMORY */
+@return own: the allocated block, state()==MEMORY */
inline buf_block_t *buf_block_alloc();
/********************************************************************//**
Frees a buffer block which does not contain a file page. */
@@ -206,71 +156,37 @@ buf_block_free(
/*===========*/
buf_block_t* block); /*!< in, own: block to be freed */
-/**************************************************************//**
-NOTE! The following macros should be used instead of buf_page_get_gen,
-to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
-in LA! */
#define buf_page_get(ID, SIZE, LA, MTR) \
- buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, __FILE__, __LINE__, MTR)
-
-/**************************************************************//**
-Use these macros to bufferfix a page with no latching. Remember not to
-read the contents of the page unless you know it is safe. Do not modify
-the contents of the page! We have separated this case, because it is
-error-prone programming not to set a latch, and it should be used
-with care. */
-#define buf_page_get_with_no_latch(ID, SIZE, MTR) \
- buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, \
- __FILE__, __LINE__, MTR)
-/********************************************************************//**
-This is the general function used to get optimistic access to a database
-page.
-@return TRUE if success */
-ibool
-buf_page_optimistic_get(
-/*====================*/
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /*!< in: guessed block */
- ib_uint64_t modify_clock,/*!< in: modify clock value */
- const char* file, /*!< in: file name */
- unsigned line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mini-transaction */
-
-/** Given a tablespace id and page number tries to get that page. If the
-page is not in the buffer pool it is not loaded and NULL is returned.
-Suitable for using when holding the lock_sys_t::mutex.
-@param[in] page_id page id
-@param[in] file file name
-@param[in] line line where called
-@param[in] mtr mini-transaction
-@return pointer to a page or NULL */
-buf_block_t*
-buf_page_try_get_func(
- const page_id_t page_id,
- const char* file,
- unsigned line,
- mtr_t* mtr);
-
-/** Tries to get a page.
-If the page is not in the buffer pool it is not loaded. Suitable for using
-when holding the lock_sys_t::mutex.
+ buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, MTR)
+
+/** Try to acquire a page latch.
+@param rw_latch RW_S_LATCH or RW_X_LATCH
+@param block guessed block
+@param modify_clock expected value of block->modify_clock
+@param mtr mini-transaction
+@return whether the latch was acquired (the page is an allocated file page) */
+bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block,
+ uint64_t modify_clock, mtr_t *mtr);
+
+/** Try to S-latch a page.
+Suitable for using when holding the lock_sys latches (as it avoids deadlock).
@param[in] page_id page identifier
-@param[in] mtr mini-transaction
-@return the page if in buffer pool, NULL if not */
-#define buf_page_try_get(page_id, mtr) \
- buf_page_try_get_func((page_id), __FILE__, __LINE__, mtr);
+@param[in,out] mtr mini-transaction
+@return the block
+@retval nullptr if an S-latch cannot be granted immediately */
+buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr);
/** Get read access to a compressed page (usually of type
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
-The page must be released with buf_page_release_zip().
+The page must be released with unfix().
NOTE: the page is not protected by any latch. Mutual exclusion has to
be implemented at a higher level. In other words, all possible
accesses to a given page through this function must be protected by
the same set of mutexes or latches.
-@param[in] page_id page id
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size
-@return pointer to the block */
-buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size);
+@param page_id page identifier
+@param zip_size ROW_FORMAT=COMPRESSED page size in bytes
+@return pointer to the block, s-latched */
+buf_page_t *buf_page_get_zip(const page_id_t page_id, ulint zip_size);
/** Get access to a database page. Buffered redo log may be applied.
@param[in] page_id page id
@@ -278,10 +194,8 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size);
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
-BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
-@param[in] file file name
-@param[in] line line where called
-@param[in] mtr mini-transaction
+BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in,out] mtr mini-transaction
@param[out] err DB_SUCCESS or error code
@param[in] allow_ibuf_merge Allow change buffer merge while
reading the pages from file.
@@ -293,11 +207,10 @@ buf_page_get_gen(
ulint rw_latch,
buf_block_t* guess,
ulint mode,
- const char* file,
- unsigned line,
mtr_t* mtr,
dberr_t* err = NULL,
- bool allow_ibuf_merge = false);
+ bool allow_ibuf_merge = false)
+ MY_ATTRIBUTE((nonnull(6), warn_unused_result));
/** This is the low level function used to get access to a database page.
@param[in] page_id page id
@@ -305,10 +218,9 @@ buf_page_get_gen(
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
-BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
-@param[in] file file name
-@param[in] line line where called
-@param[in] mtr mini-transaction
+BUF_PEEK_IF_IN_POOL, or BUF_GET_IF_IN_POOL_OR_WATCH
+@param[in,out] mtr mini-transaction, or NULL if a
+ block with page_id is to be evicted
@param[out] err DB_SUCCESS or error code
@param[in] allow_ibuf_merge Allow change buffer merge to happen
while reading the page from file
@@ -322,16 +234,14 @@ buf_page_get_low(
ulint rw_latch,
buf_block_t* guess,
ulint mode,
- const char* file,
- unsigned line,
mtr_t* mtr,
dberr_t* err,
bool allow_ibuf_merge);
/** Initialize a page in the buffer pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
-of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_get_gen).
+of the functions which perform to a block a state transition NOT_USED => LRU
+(the other is buf_page_get_low()).
@param[in,out] space space object
@param[in] offset offset of the tablespace
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@@ -342,52 +252,24 @@ buf_block_t*
buf_page_create(fil_space_t *space, uint32_t offset,
ulint zip_size, mtr_t *mtr, buf_block_t *free_block);
-/********************************************************************//**
-Releases a compressed-only page acquired with buf_page_get_zip(). */
-UNIV_INLINE
-void
-buf_page_release_zip(
-/*=================*/
- buf_page_t* bpage); /*!< in: buffer block */
-/********************************************************************//**
-Releases a latch, if specified. */
-UNIV_INLINE
-void
-buf_page_release_latch(
-/*=====================*/
- buf_block_t* block, /*!< in: buffer block */
- ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH,
- RW_NO_LATCH */
+/** Initialize a page in buffer pool while initializing the
+deferred tablespace
+@param space_id space identfier
+@param zip_size ROW_FORMAT=COMPRESSED page size or 0
+@param mtr mini-transaction
+@param free_block pre-allocated buffer block
+@return pointer to the block, page bufferfixed */
+buf_block_t*
+buf_page_create_deferred(uint32_t space_id, ulint zip_size, mtr_t *mtr,
+ buf_block_t *free_block);
+
/** Move a block to the start of the LRU list. */
void buf_page_make_young(buf_page_t *bpage);
-/** Mark the page status as FREED for the given tablespace id and
-page number. If the page is not in buffer pool then ignore it.
+/** Mark the page status as FREED for the given tablespace and page number.
@param[in,out] space tablespace
@param[in] page page number
-@param[in,out] mtr mini-transaction
-@param[in] file file name
-@param[in] line line where called */
-void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr,
- const char *file, unsigned line);
-
-/********************************************************************//**
-Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
-UNIV_INLINE
-unsigned
-buf_page_get_freed_page_clock(
-/*==========================*/
- const buf_page_t* bpage) /*!< in: block */
- MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
-UNIV_INLINE
-unsigned
-buf_block_get_freed_page_clock(
-/*===========================*/
- const buf_block_t* block) /*!< in: block */
- MY_ATTRIBUTE((warn_unused_result));
+@param[in,out] mtr mini-transaction */
+void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr);
/** Determine if a block is still close enough to the MRU end of the LRU list
meaning that it is not in danger of getting evicted and also implying
@@ -431,32 +313,6 @@ ib_uint64_t
buf_block_get_modify_clock(
/*=======================*/
buf_block_t* block); /*!< in: block */
-/*******************************************************************//**
-Increments the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_buf_fix_inc_func(
-/*=======================*/
-# ifdef UNIV_DEBUG
- const char* file, /*!< in: file name */
- unsigned line, /*!< in: line */
-# endif /* UNIV_DEBUG */
- buf_block_t* block) /*!< in/out: block to bufferfix */
- MY_ATTRIBUTE((nonnull));
-
-# ifdef UNIV_DEBUG
-/** Increments the bufferfix count.
-@param[in,out] b block to bufferfix
-@param[in] f file name where requested
-@param[in] l line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
-# else /* UNIV_DEBUG */
-/** Increments the bufferfix count.
-@param[in,out] b block to bufferfix
-@param[in] f file name where requested
-@param[in] l line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
-# endif /* UNIV_DEBUG */
#endif /* !UNIV_INNOCHECKSUM */
/** Check if a buffer is all zeroes.
@@ -464,42 +320,6 @@ buf_block_buf_fix_inc_func(
@return whether the buffer is all zeroes */
bool buf_is_zeroes(st_::span<const byte> buf);
-/** Checks if the page is in crc32 checksum format.
-@param[in] read_buf database page
-@param[in] checksum_field1 new checksum field
-@param[in] checksum_field2 old checksum field
-@return true if the page is in crc32 checksum format. */
-bool
-buf_page_is_checksum_valid_crc32(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
- MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-
-/** Checks if the page is in innodb checksum format.
-@param[in] read_buf database page
-@param[in] checksum_field1 new checksum field
-@param[in] checksum_field2 old checksum field
-@return true if the page is in innodb checksum format. */
-bool
-buf_page_is_checksum_valid_innodb(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
- MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-
-/** Checks if the page is in none checksum format.
-@param[in] read_buf database page
-@param[in] checksum_field1 new checksum field
-@param[in] checksum_field2 old checksum field
-@return true if the page is in none checksum format. */
-bool
-buf_page_is_checksum_valid_none(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
- MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-
/** Check if a page is corrupt.
@param[in] check_lsn whether the LSN should be checked
@param[in] read_buf database page
@@ -512,27 +332,6 @@ buf_page_is_corrupted(
ulint fsp_flags)
MY_ATTRIBUTE((warn_unused_result));
-inline void *aligned_malloc(size_t size, size_t align)
-{
-#ifdef _MSC_VER
- return _aligned_malloc(size, align);
-#else
- void *result;
- if (posix_memalign(&result, align, size))
- result= NULL;
- return result;
-#endif
-}
-
-inline void aligned_free(void *ptr)
-{
-#ifdef _MSC_VER
- _aligned_free(ptr);
-#else
- free(ptr);
-#endif
-}
-
/** Read the key version from the page. In full crc32 format,
key version is stored at {0-3th} bytes. In other format, it is
stored in 26th position.
@@ -631,35 +430,7 @@ void buf_pool_invalidate();
--------------------------- LOWER LEVEL ROUTINES -------------------------
=========================================================================*/
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Adds latch level info for the rw-lock protecting the buffer frame. This
-should be called in the debug version after a successful latching of a
-page if we know the latching order level of the acquired latch. */
-UNIV_INLINE
-void
-buf_block_dbg_add_level(
-/*====================*/
- buf_block_t* block, /*!< in: buffer page
- where we have acquired latch */
- latch_level_t level); /*!< in: latching order level */
-#else /* UNIV_DEBUG */
-# define buf_block_dbg_add_level(block, level) /* nothing */
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets a pointer to the memory frame of a block.
-@return pointer to the frame */
-UNIV_INLINE
-buf_frame_t*
-buf_block_get_frame(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((warn_unused_result));
-#else /* UNIV_DEBUG */
-# define buf_block_get_frame(block) (block)->frame
-#endif /* UNIV_DEBUG */
+#define buf_block_get_frame(block) (block)->page.frame
/*********************************************************************//**
Gets the compressed page descriptor corresponding to an uncompressed page
@@ -672,18 +443,8 @@ if applicable. */
/** Monitor the buffer page read/write activity, and increment corresponding
counter value in MONITOR_MODULE_BUF_PAGE.
@param bpage buffer page whose read or write was completed
-@param io_type BUF_IO_READ or BUF_IO_WRITE */
-ATTRIBUTE_COLD __attribute__((nonnull))
-void buf_page_monitor(const buf_page_t *bpage, buf_io_fix io_type);
-
-/** Complete a read request of a file page to buf_pool.
-@param bpage recently read page
-@param node data file
-@return whether the operation succeeded
-@retval DB_SUCCESS always when writing, or if a read page was OK
-@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
-@retval DB_DECRYPTION_FAILED if the page cannot be decrypted */
-dberr_t buf_page_read_complete(buf_page_t *bpage, const fil_node_t &node);
+@param read true=read, false=write */
+ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read);
/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
if needed.
@@ -752,17 +513,17 @@ class buf_page_t
{
friend buf_pool_t;
friend buf_block_t;
+
/** @name General fields */
/* @{ */
public: // FIXME: fix fil_iterate()
- /** Page id. Protected by buf_pool.hash_lock_get(id) when
+ /** Page id. Protected by buf_pool.page_hash.lock_get() when
the page is in buf_pool.page_hash. */
page_id_t id_;
+ /** buf_pool.page_hash link; protected by buf_pool.page_hash.lock_get() */
+ buf_page_t *hash;
private:
- /** Count of how manyfold this block is currently bufferfixed. */
- Atomic_counter<uint32_t> buf_fix_count_;
-
/** log sequence number of the START of the log entry written of the
oldest modification to this block which has not yet been written
to the data file;
@@ -773,53 +534,64 @@ private:
(because id().space() is the temporary tablespace). */
Atomic_relaxed<lsn_t> oldest_modification_;
- /** type of pending I/O operation; protected by buf_pool.mutex
- if in_LRU_list */
- Atomic_relaxed<buf_io_fix> io_fix_;
- /** Block state. @see in_file().
- State transitions between in_file() states and to
- BUF_BLOCK_REMOVE_HASH are protected by buf_pool.hash_lock_get(id)
- when the block is in buf_pool.page_hash.
- Other transitions when in_LRU_list are protected by buf_pool.mutex. */
- buf_page_state state_;
-
public:
- /** buf_pool.page_hash link; protected by buf_pool.hash_lock_get(id) */
- buf_page_t *hash;
+ /** state() of unused block (in buf_pool.free list) */
+ static constexpr uint32_t NOT_USED= 0;
+ /** state() of block allocated as general-purpose memory */
+ static constexpr uint32_t MEMORY= 1;
+ /** state() of block that is being freed */
+ static constexpr uint32_t REMOVE_HASH= 2;
+ /** smallest state() of a buffer page that is freed in the tablespace */
+ static constexpr uint32_t FREED= 3;
+ /** smallest state() for a block that belongs to buf_pool.LRU */
+ static constexpr uint32_t UNFIXED= 1U << 29;
+ /** smallest state() of a block for which buffered changes may exist */
+ static constexpr uint32_t IBUF_EXIST= 2U << 29;
+ /** smallest state() of a (re)initialized page (no doublewrite needed) */
+ static constexpr uint32_t REINIT= 3U << 29;
+ /** smallest state() for an io-fixed block */
+ static constexpr uint32_t READ_FIX= 4U << 29;
+ /** smallest state() for a write-fixed block */
+ static constexpr uint32_t WRITE_FIX= 5U << 29;
+ /** smallest state() for a write-fixed block with buffered changes */
+ static constexpr uint32_t WRITE_FIX_IBUF= 6U << 29;
+ /** smallest state() for a write-fixed block (no doublewrite was used) */
+ static constexpr uint32_t WRITE_FIX_REINIT= 7U << 29;
+ /** buf_pool.LRU status mask in state() */
+ static constexpr uint32_t LRU_MASK= 7U << 29;
+
+ /** lock covering the contents of frame */
+ block_lock lock;
+ /** pointer to aligned, uncompressed page frame of innodb_page_size */
+ byte *frame;
/* @} */
- page_zip_des_t zip; /*!< compressed page; zip.data
- (but not the data it points to) is
- also protected by buf_pool.mutex;
- state == BUF_BLOCK_ZIP_PAGE and
- zip.data == NULL means an active
- buf_pool.watch */
-
- buf_tmp_buffer_t* slot; /*!< Slot for temporary memory
- used for encryption/compression
- or NULL */
+ /** ROW_FORMAT=COMPRESSED page; zip.data (but not the data it points to)
+ is also protected by buf_pool.mutex;
+ !frame && !zip.data means an active buf_pool.watch */
+ page_zip_des_t zip;
#ifdef UNIV_DEBUG
/** whether this->list is in buf_pool.zip_hash; protected by buf_pool.mutex */
bool in_zip_hash;
- /** whether this->LRU is in buf_pool.LRU (in_file() holds);
+ /** whether this->LRU is in buf_pool.LRU (in_file());
protected by buf_pool.mutex */
bool in_LRU_list;
- /** whether this is in buf_pool.page_hash (in_file() holds);
+ /** whether this is in buf_pool.page_hash (in_file());
protected by buf_pool.mutex */
bool in_page_hash;
- /** whether this->list is in buf_pool.free (state() == BUF_BLOCK_NOT_USED);
+ /** whether this->list is in buf_pool.free (state() == NOT_USED);
protected by buf_pool.flush_list_mutex */
bool in_free_list;
#endif /* UNIV_DEBUG */
/** list member in one of the lists of buf_pool; protected by
buf_pool.mutex or buf_pool.flush_list_mutex
- state() == BUF_BLOCK_NOT_USED: buf_pool.free or buf_pool.withdraw
+ state() == NOT_USED: buf_pool.free or buf_pool.withdraw
in_file() && oldest_modification():
buf_pool.flush_list (protected by buf_pool.flush_list_mutex)
The contents is undefined if in_file() && !oldest_modification(),
- or if state() is BUF_BLOCK_MEMORY or BUF_BLOCK_REMOVE_HASH. */
+ or if state() == MEMORY or state() == REMOVE_HASH. */
UT_LIST_NODE_T(buf_page_t) list;
/** @name LRU replacement algorithm fields.
@@ -843,7 +615,7 @@ public:
0 if the block was never accessed
in the buffer pool.
- For state==BUF_BLOCK_MEMORY
+ For state() == MEMORY
blocks, this field can be repurposed
for something else.
@@ -851,89 +623,127 @@ public:
and bytes allocated for recv_sys.pages,
the field is protected by
recv_sys_t::mutex. */
- /** Change buffer entries for the page exist.
- Protected by io_fix()==BUF_IO_READ or by buf_block_t::lock. */
- bool ibuf_exist;
-
- /** Block initialization status. Can be modified while holding io_fix()
- or buf_block_t::lock X-latch */
- enum {
- /** the page was read normally and should be flushed normally */
- NORMAL = 0,
- /** the page was (re)initialized, and the doublewrite buffer can be
- skipped on the next flush */
- INIT_ON_FLUSH,
- /** the page was freed and need to be flushed.
- For page_compressed, page flush will punch a hole to free space.
- Else if innodb_immediate_scrub_data_uncompressed, the page will
- be overwritten with zeroes. */
- FREED
- } status;
-
- buf_page_t() : id_(0)
+ buf_page_t() : id_{0}
{
- static_assert(BUF_BLOCK_NOT_USED == 0, "compatibility");
+ static_assert(NOT_USED == 0, "compatibility");
memset((void*) this, 0, sizeof *this);
}
- /** Initialize some fields */
- void init()
+ buf_page_t(const buf_page_t &b) :
+ id_(b.id_), hash(b.hash),
+ oldest_modification_(b.oldest_modification_),
+ lock() /* not copied */,
+ frame(b.frame), zip(b.zip),
+#ifdef UNIV_DEBUG
+ in_zip_hash(b.in_zip_hash), in_LRU_list(b.in_LRU_list),
+ in_page_hash(b.in_page_hash), in_free_list(b.in_free_list),
+#endif /* UNIV_DEBUG */
+ list(b.list), LRU(b.LRU), old(b.old), freed_page_clock(b.freed_page_clock),
+ access_time(b.access_time)
{
- io_fix_= BUF_IO_NONE;
- buf_fix_count_= 0;
- old= 0;
- freed_page_clock= 0;
- access_time= 0;
+ lock.init();
+ }
+
+ /** Initialize some more fields */
+ void init(uint32_t state, page_id_t id)
+ {
+ ut_ad(state < REMOVE_HASH || state >= UNFIXED);
+ id_= id;
+ zip.fix= state;
oldest_modification_= 0;
- slot= nullptr;
- ibuf_exist= false;
- status= NORMAL;
+ lock.init();
ut_d(in_zip_hash= false);
ut_d(in_free_list= false);
ut_d(in_LRU_list= false);
ut_d(in_page_hash= false);
- HASH_INVALIDATE(this, hash);
+ old= 0;
+ freed_page_clock= 0;
+ access_time= 0;
}
- /** Initialize some more fields */
- void init(buf_page_state state, page_id_t id, uint32_t buf_fix_count= 0)
+public:
+ const page_id_t &id() const { return id_; }
+ uint32_t state() const { return zip.fix; }
+ uint32_t buf_fix_count() const
{
- init();
- state_= state;
- id_= id;
- buf_fix_count_= buf_fix_count;
+ uint32_t f= state();
+ ut_ad(f >= FREED);
+ return f < UNFIXED ? (f - FREED) : (~LRU_MASK & f);
}
+ /** @return whether this block is read or write fixed;
+ read_complete() or write_complete() will always release
+ the io-fix before releasing U-lock or X-lock */
+ bool is_io_fixed() const
+ { const auto s= state(); ut_ad(s >= FREED); return s >= READ_FIX; }
+ /** @return whether this block is write fixed;
+ write_complete() will always release the write-fix before releasing U-lock */
+ bool is_write_fixed() const { return state() >= WRITE_FIX; }
+ /** @return whether this block is read fixed; this should never hold
+ when a thread is holding the block lock in any mode */
+ bool is_read_fixed() const { return is_io_fixed() && !is_write_fixed(); }
- /** Initialize some more fields */
- void init(page_id_t id, uint32_t buf_fix_count= 0)
+ /** @return if this belongs to buf_pool.unzip_LRU */
+ bool belongs_to_unzip_LRU() const
+ { return UNIV_LIKELY_NULL(zip.data) && frame; }
+
+ bool is_freed() const
+ { const auto s= state(); ut_ad(s >= FREED); return s < UNFIXED; }
+ bool is_ibuf_exist() const
{
- init();
- id_= id;
- buf_fix_count_= buf_fix_count;
+ const auto s= state();
+ ut_ad(s >= UNFIXED);
+ ut_ad(s < READ_FIX);
+ return (s & LRU_MASK) == IBUF_EXIST;
}
+ bool is_reinit() const { return !(~state() & REINIT); }
-public:
- const page_id_t &id() const { return id_; }
- buf_page_state state() const { return state_; }
- uint32_t buf_fix_count() const { return buf_fix_count_; }
- buf_io_fix io_fix() const { return io_fix_; }
- void io_unfix()
+ void set_reinit(uint32_t prev_state)
{
- ut_d(const auto old_io_fix= io_fix());
- ut_ad(old_io_fix == BUF_IO_READ || old_io_fix == BUF_IO_PIN);
- io_fix_= BUF_IO_NONE;
+ ut_ad(prev_state < READ_FIX);
+ ut_d(const auto s=) zip.fix.fetch_add(REINIT - prev_state);
+ ut_ad(s > prev_state);
+ ut_ad(s < prev_state + UNFIXED);
}
- /** @return if this belongs to buf_pool.unzip_LRU */
- bool belongs_to_unzip_LRU() const
+ void set_ibuf_exist()
+ {
+ ut_ad(lock.is_write_locked());
+ ut_ad(id() < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0));
+ const auto s= state();
+ ut_ad(s >= UNFIXED);
+ ut_ad(s < READ_FIX);
+ ut_ad(s < IBUF_EXIST || s >= REINIT);
+ zip.fix.fetch_add(IBUF_EXIST - (LRU_MASK & s));
+ }
+ void clear_ibuf_exist()
+ {
+ ut_ad(lock.is_write_locked());
+ ut_ad(id() < page_id_t(SRV_SPACE_ID_UPPER_BOUND, 0));
+ ut_d(const auto s=) zip.fix.fetch_sub(IBUF_EXIST - UNFIXED);
+ ut_ad(s >= IBUF_EXIST);
+ ut_ad(s < REINIT);
+ }
+
+ uint32_t read_unfix(uint32_t s)
{
- return zip.data && state() != BUF_BLOCK_ZIP_PAGE;
+ ut_ad(lock.is_write_locked());
+ ut_ad(s == UNFIXED + 1 || s == IBUF_EXIST + 1 || s == REINIT + 1);
+ uint32_t old_state= zip.fix.fetch_add(s - READ_FIX);
+ ut_ad(old_state >= READ_FIX);
+ ut_ad(old_state < WRITE_FIX);
+ return old_state + (s - READ_FIX);
}
- inline void add_buf_fix_count(uint32_t count);
- inline void set_buf_fix_count(uint32_t count);
- inline void set_state(buf_page_state state);
- inline void set_io_fix(buf_io_fix io_fix);
+ void set_freed(uint32_t prev_state, uint32_t count= 0)
+ {
+ ut_ad(lock.is_write_locked());
+ ut_ad(prev_state >= UNFIXED);
+ ut_ad(prev_state < READ_FIX);
+ ut_d(auto s=) zip.fix.fetch_sub((prev_state & LRU_MASK) - FREED - count);
+ ut_ad(!((prev_state ^ s) & LRU_MASK));
+ }
+
+ inline void set_state(uint32_t s);
inline void set_corrupt_id();
/** @return the log sequence number of the oldest pending modification
@@ -953,35 +763,72 @@ public:
inline void set_oldest_modification(lsn_t lsn);
/** Clear oldest_modification after removing from buf_pool.flush_list */
inline void clear_oldest_modification();
+ /** Reset the oldest_modification when marking a persistent page freed */
+ void reset_oldest_modification()
+ {
+ ut_ad(oldest_modification() > 2);
+ oldest_modification_.store(1, std::memory_order_release);
+ }
+
+ /** Complete a read of a page.
+ @param node data file
+ @return whether the operation succeeded
+ @retval DB_PAGE_CORRUPTED if the checksum fails
+ @retval DB_DECRYPTION_FAILED if the page cannot be decrypted
+ @retval DB_FAIL if the page contains the wrong ID */
+ dberr_t read_complete(const fil_node_t &node);
+
/** Note that a block is no longer dirty, while not removing
it from buf_pool.flush_list */
- inline void clear_oldest_modification(bool temporary);
+ inline void write_complete(bool temporary);
+
+ /** Write a flushable page to a file or free a freeable block.
+ @param evict whether to evict the page on write completion
+ @param space tablespace
+ @return whether a page write was initiated and buf_pool.mutex released */
+ bool flush(bool evict, fil_space_t *space);
/** Notify that a page in a temporary tablespace has been modified. */
void set_temp_modified()
{
ut_ad(fsp_is_system_temporary(id().space()));
- ut_ad(state() == BUF_BLOCK_FILE_PAGE);
- ut_ad(!oldest_modification());
+ ut_ad(in_file());
+ ut_ad((oldest_modification() | 2) == 2);
oldest_modification_= 2;
}
/** Prepare to release a file page to buf_pool.free. */
void free_file_page()
{
- ut_ad(state() == BUF_BLOCK_REMOVE_HASH);
+ ut_ad((zip.fix.fetch_sub(REMOVE_HASH - MEMORY)) == REMOVE_HASH);
/* buf_LRU_block_free_non_file_page() asserts !oldest_modification() */
ut_d(oldest_modification_= 0;)
- set_corrupt_id();
- ut_d(set_state(BUF_BLOCK_MEMORY));
+ id_= page_id_t(~0ULL);
+ }
+
+ void fix_on_recovery()
+ {
+ ut_d(const auto f=) zip.fix.fetch_sub(READ_FIX - UNFIXED - 1);
+ ut_ad(f >= READ_FIX);
+ ut_ad(f < WRITE_FIX);
+ }
+
+ uint32_t fix(uint32_t count= 1)
+ {
+ ut_ad(count);
+ ut_ad(count < IBUF_EXIST);
+ uint32_t f= zip.fix.fetch_add(count);
+ ut_ad(f >= FREED);
+ ut_ad(!((f ^ (f + 1)) & LRU_MASK));
+ return f;
}
- void fix() { buf_fix_count_++; }
uint32_t unfix()
{
- uint32_t count= buf_fix_count_--;
- ut_ad(count != 0);
- return count - 1;
+ uint32_t f= zip.fix.fetch_sub(1);
+ ut_ad(f > FREED);
+ ut_ad(!((f ^ (f - 1)) & LRU_MASK));
+ return f - 1;
}
/** @return the physical size, in bytes */
@@ -1007,27 +854,8 @@ public:
}
/** @return whether the block is mapped to a data file */
- bool in_file() const
- {
- switch (state_) {
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_FILE_PAGE:
- return true;
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- return false;
- }
+ bool in_file() const { return state() >= FREED; }
- ut_error;
- return false;
- }
-
- /** @return whether the block is modified and ready for flushing */
- inline bool ready_for_flush() const;
- /** @return whether the state can be changed to BUF_BLOCK_NOT_USED */
- bool ready_for_replace() const
- { return !oldest_modification() && can_relocate(); }
/** @return whether the block can be relocated in memory.
The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
inline bool can_relocate() const;
@@ -1059,27 +887,18 @@ struct buf_block_t{
be the first field, so that
buf_pool.page_hash can point
to buf_page_t or buf_block_t */
- byte* frame; /*!< pointer to buffer frame which
- is of size srv_page_size, and
- aligned to an address divisible by
- srv_page_size */
- rw_lock_t lock; /*!< read-write lock of the buffer
- frame */
#ifdef UNIV_DEBUG
/** whether page.list is in buf_pool.withdraw
- ((state() == BUF_BLOCK_NOT_USED)) and the buffer pool is being shrunk;
+ ((state() == NOT_USED)) and the buffer pool is being shrunk;
protected by buf_pool.mutex */
bool in_withdraw_list;
/** whether unzip_LRU is in buf_pool.unzip_LRU
- (state() == BUF_BLOCK_FILE_PAGE and zip.data != nullptr);
+ (in_file() && frame && zip.data);
protected by buf_pool.mutex */
bool in_unzip_LRU_list;
#endif
- UT_LIST_NODE_T(buf_block_t) unzip_LRU;
- /*!< node of the decompressed LRU list;
- a block is in the unzip_LRU list
- if page.state() == BUF_BLOCK_FILE_PAGE
- and page.zip.data != NULL */
+ /** member of buf_pool.unzip_LRU (if belongs_to_unzip_LRU()) */
+ UT_LIST_NODE_T(buf_block_t) unzip_LRU;
/* @} */
/** @name Optimistic search field */
/* @{ */
@@ -1118,17 +937,15 @@ struct buf_block_t{
These 5 fields may only be modified when:
we are holding the appropriate x-latch in btr_search_latches[], and
one of the following holds:
- (1) the block state is BUF_BLOCK_FILE_PAGE, and
- we are holding an s-latch or x-latch on buf_block_t::lock, or
- (2) buf_block_t::buf_fix_count == 0, or
- (3) the block state is BUF_BLOCK_REMOVE_HASH.
+ (1) in_file(), and we are holding lock in any mode, or
+ (2) !is_read_fixed()&&(state()>=UNFIXED||state()==REMOVE_HASH).
An exception to this is when we init or create a page
in the buffer pool in buf0buf.cc.
Another exception for buf_pool_t::clear_hash_index() is that
assigning block->index = NULL (and block->n_pointers = 0)
- is allowed whenever btr_search_own_all(RW_LOCK_X).
+ is allowed whenever all AHI latches are exclusively locked.
Another exception is that ha_insert_for_fold() may
decrement n_pointers without holding the appropriate latch
@@ -1137,8 +954,8 @@ struct buf_block_t{
This implies that the fields may be read without race
condition whenever any of the following hold:
- - the btr_search_latches[] s-latch or x-latch is being held, or
- - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH,
+ - the btr_search_sys.partition[].latch is being held, or
+ - state() == NOT_USED || state() == MEMORY,
and holding some latch prevents the state from changing to that.
Some use of assert_block_ahi_empty() or assert_block_ahi_valid()
@@ -1152,9 +969,7 @@ struct buf_block_t{
Atomic_counter<ulint>
n_pointers; /*!< used in debugging: the number of
pointers in the adaptive hash index
- pointing to this frame;
- protected by atomic memory access
- or btr_search_own_all(). */
+ pointing to this frame */
# define assert_block_ahi_empty(block) \
ut_a((block)->n_pointers == 0)
# define assert_block_ahi_empty_on_init(block) do { \
@@ -1188,24 +1003,8 @@ struct buf_block_t{
# define assert_block_ahi_empty_on_init(block) /* nothing */
# define assert_block_ahi_valid(block) /* nothing */
#endif /* BTR_CUR_HASH_ADAPT */
-# ifdef UNIV_DEBUG
- /** @name Debug fields */
- /* @{ */
- rw_lock_t* debug_latch; /*!< in the debug version, each thread
- which bufferfixes the block acquires
- an s-latch here; so we can use the
- debug utilities in sync0rw */
- /* @} */
-# endif
void fix() { page.fix(); }
- uint32_t unfix()
- {
- ut_ad(page.buf_fix_count() || page.io_fix() != BUF_IO_NONE ||
- page.state() == BUF_BLOCK_ZIP_PAGE ||
- !rw_lock_own_flagged(&lock, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S |
- RW_LOCK_FLAG_SX));
- return page.unfix();
- }
+ uint32_t unfix() { return page.unfix(); }
/** @return the physical size, in bytes */
ulint physical_size() const { return page.physical_size(); }
@@ -1217,22 +1016,22 @@ struct buf_block_t{
/** Initialize the block.
@param page_id page identifier
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
- @param fix initial buf_fix_count() */
- void initialise(const page_id_t page_id, ulint zip_size, uint32_t fix= 0);
+ @param state initial state() */
+ void initialise(const page_id_t page_id, ulint zip_size, uint32_t state);
};
/**********************************************************************//**
Compute the hash fold value for blocks in buf_pool.zip_hash. */
/* @{ */
#define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift)
-#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
+#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->page.frame)
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
/* @} */
-/** A "Hazard Pointer" class used to iterate over page lists
-inside the buffer pool. A hazard pointer is a buf_page_t pointer
+/** A "Hazard Pointer" class used to iterate over buf_pool.LRU or
+buf_pool.flush_list. A hazard pointer is a buf_page_t pointer
which we intend to iterate over next and we want it remain valid
-even after we release the buffer pool mutex. */
+even after we release the mutex that protects the list. */
class HazardPointer
{
public:
@@ -1347,14 +1146,18 @@ struct buf_buddy_free_t {
/*!< Node of zip_free list */
};
-/** @brief The buffer pool statistics structure. */
+/** @brief The buffer pool statistics structure;
+protected by buf_pool.mutex unless otherwise noted. */
struct buf_pool_stat_t{
- ulint n_page_gets; /*!< number of page gets performed;
+ /** Initialize the counters */
+ void init() { memset((void*) this, 0, sizeof *this); }
+
+ ib_counter_t<ulint, ib_counter_element_t> n_page_gets;
+ /*!< number of page gets performed;
also successful searches through
the adaptive hash index are
- counted as page gets; this field
- is NOT protected by the buffer
- pool mutex */
+ counted as page gets;
+ NOT protected by buf_pool.mutex */
ulint n_pages_read; /*!< number read operations */
ulint n_pages_written;/*!< number write operations */
ulint n_pages_created;/*!< number of pages created
@@ -1372,10 +1175,9 @@ struct buf_pool_stat_t{
young because the first access
was not long enough ago, in
buf_page_peek_if_too_old() */
- /** number of waits for eviction; writes protected by buf_pool.mutex */
+ /** number of waits for eviction */
ulint LRU_waits;
ulint LRU_bytes; /*!< LRU size in bytes */
- ulint flush_list_bytes;/*!< flush_list size in bytes */
};
/** Statistics of buddy blocks of a given size. */
@@ -1415,7 +1217,7 @@ class buf_pool_t
size_t mem_size() const { return mem_pfx.m_size; }
/** Register the chunk */
- void reg() { map_reg->emplace(map::value_type(blocks->frame, this)); }
+ void reg() { map_reg->emplace(map::value_type(blocks->page.frame, this)); }
/** Allocate a chunk of buffer frames.
@param bytes requested size
@@ -1442,7 +1244,14 @@ class buf_pool_t
inline const buf_block_t *not_freed() const;
#endif /* UNIV_DEBUG */
};
-
+public:
+ /** Hash cell chain in page_hash_table */
+ struct hash_chain
+ {
+ /** pointer to the first block */
+ buf_page_t *first;
+ };
+private:
/** Withdraw blocks from the buffer pool until meeting withdraw_target.
@return whether retry is needed */
inline bool withdraw_blocks();
@@ -1494,27 +1303,27 @@ public:
{
ut_ad(is_initialised());
size_t size= 0;
- for (auto j= n_chunks; j--; )
+ for (auto j= ut_min(n_chunks_new, n_chunks); j--; )
size+= chunks[j].size;
return size;
}
/** Determine whether a frame is intended to be withdrawn during resize().
- @param ptr pointer within a buf_block_t::frame
+ @param ptr pointer within a buf_page_t::frame
@return whether the frame will be withdrawn */
bool will_be_withdrawn(const byte *ptr) const
{
- ut_ad(curr_size < old_size);
+ ut_ad(n_chunks_new < n_chunks);
#ifdef SAFE_MUTEX
- if (resizing.load(std::memory_order_relaxed))
+ if (resize_in_progress())
mysql_mutex_assert_owner(&mutex);
#endif /* SAFE_MUTEX */
for (const chunk_t *chunk= chunks + n_chunks_new,
* const echunk= chunks + n_chunks;
chunk != echunk; chunk++)
- if (ptr >= chunk->blocks->frame &&
- ptr < (chunk->blocks + chunk->size - 1)->frame + srv_page_size)
+ if (ptr >= chunk->blocks->page.frame &&
+ ptr < (chunk->blocks + chunk->size - 1)->page.frame + srv_page_size)
return true;
return false;
}
@@ -1524,9 +1333,9 @@ public:
@return whether the frame will be withdrawn */
bool will_be_withdrawn(const buf_page_t &bpage) const
{
- ut_ad(curr_size < old_size);
+ ut_ad(n_chunks_new < n_chunks);
#ifdef SAFE_MUTEX
- if (resizing.load(std::memory_order_relaxed))
+ if (resize_in_progress())
mysql_mutex_assert_owner(&mutex);
#endif /* SAFE_MUTEX */
@@ -1540,8 +1349,9 @@ public:
}
/** Release and evict a corrupted page.
- @param bpage page that was being read */
- ATTRIBUTE_COLD void corrupted_evict(buf_page_t *bpage);
+ @param bpage x-latched page that was found corrupted
+ @param state expected current state of the page */
+ ATTRIBUTE_COLD void corrupted_evict(buf_page_t *bpage, uint32_t state);
/** Release a memory block to the buffer pool. */
ATTRIBUTE_COLD void free_block(buf_block_t *block);
@@ -1576,9 +1386,6 @@ public:
inline buf_block_t *block_from_ahi(const byte *ptr) const;
#endif /* BTR_CUR_HASH_ADAPT */
- bool is_block_lock(const rw_lock_t *l) const
- { return is_block_field(static_cast<const void*>(l)); }
-
/**
@return the smallest oldest_modification lsn for any page
@retval empty_lsn if all modified persistent pages have been flushed */
@@ -1607,84 +1414,27 @@ public:
return is_block_field(reinterpret_cast<const void*>(block));
}
- /** Get the page_hash latch for a page */
- page_hash_latch *hash_lock_get(const page_id_t id) const
- {
- return page_hash.lock_get(id.fold());
- }
-
- /** Look up a block descriptor.
- @param id page identifier
- @param fold id.fold()
- @return block descriptor, possibly in watch[]
- @retval nullptr if not found*/
- buf_page_t *page_hash_get_low(const page_id_t id, const ulint fold)
- {
- ut_ad(id.fold() == fold);
-#ifdef SAFE_MUTEX
- DBUG_ASSERT(mysql_mutex_is_owner(&mutex) ||
- page_hash.lock_get(fold)->is_locked());
-#endif /* SAFE_MUTEX */
- buf_page_t *bpage;
- /* Look for the page in the hash table */
- HASH_SEARCH(hash, &page_hash, fold, buf_page_t*, bpage,
- ut_ad(bpage->in_page_hash), id == bpage->id());
- return bpage;
- }
-private:
- /** Look up a block descriptor.
- @tparam exclusive whether the latch is to be acquired exclusively
- @tparam watch whether to allow watch_is_sentinel()
- @param page_id page identifier
- @param fold page_id.fold()
- @param hash_lock pointer to the acquired latch (to be released by caller)
- @return pointer to the block
- @retval nullptr if no block was found; !lock || !*lock will also hold */
- template<bool exclusive,bool watch>
- buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold,
- page_hash_latch **hash_lock)
+public:
+ /** @return whether the buffer pool contains a page
+ @tparam allow_watch whether to allow watch_is_sentinel()
+ @param page_id page identifier
+ @param chain hash table chain for page_id.fold() */
+ template<bool allow_watch= false>
+ TRANSACTIONAL_INLINE
+ bool page_hash_contains(const page_id_t page_id, hash_chain &chain)
{
- ut_ad(hash_lock || !exclusive);
- page_hash_latch *latch= page_hash.lock<exclusive>(fold);
- buf_page_t *bpage= page_hash_get_low(page_id, fold);
- if (!bpage || watch_is_sentinel(*bpage))
+ transactional_shared_lock_guard<page_hash_latch> g
+ {page_hash.lock_get(chain)};
+ buf_page_t *bpage= page_hash.get(page_id, chain);
+ if (bpage >= &watch[0] && bpage < &watch[UT_ARR_SIZE(watch)])
{
- latch->release<exclusive>();
- if (hash_lock)
- *hash_lock= nullptr;
- return watch ? bpage : nullptr;
+ ut_ad(!bpage->in_zip_hash);
+ ut_ad(!bpage->zip.data);
+ if (!allow_watch)
+ bpage= nullptr;
}
-
- ut_ad(bpage->in_file());
- ut_ad(page_id == bpage->id());
-
- if (hash_lock)
- *hash_lock= latch; /* to be released by the caller */
- else
- latch->release<exclusive>();
return bpage;
}
-public:
- /** Look up a block descriptor.
- @tparam exclusive whether the latch is to be acquired exclusively
- @param page_id page identifier
- @param fold page_id.fold()
- @param hash_lock pointer to the acquired latch (to be released by caller)
- @return pointer to the block
- @retval nullptr if no block was found; !lock || !*lock will also hold */
- template<bool exclusive>
- buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold,
- page_hash_latch **hash_lock)
- { return page_hash_get_locked<exclusive,false>(page_id, fold, hash_lock); }
-
- /** @return whether the buffer pool contains a page
- @tparam watch whether to allow watch_is_sentinel()
- @param page_id page identifier */
- template<bool watch= false>
- bool page_hash_contains(const page_id_t page_id)
- {
- return page_hash_get_locked<false,watch>(page_id, page_id.fold(), nullptr);
- }
/** Determine if a block is a sentinel for a buffer pool watch.
@param bpage page descriptor
@@ -1693,17 +1443,12 @@ public:
{
#ifdef SAFE_MUTEX
DBUG_ASSERT(mysql_mutex_is_owner(&mutex) ||
- hash_lock_get(bpage.id())->is_locked());
+ page_hash.lock_get(page_hash.cell_get(bpage.id().fold())).
+ is_locked());
#endif /* SAFE_MUTEX */
ut_ad(bpage.in_file());
-
- if (&bpage < &watch[0] || &bpage >= &watch[UT_ARR_SIZE(watch)])
- {
- ut_ad(bpage.state() != BUF_BLOCK_ZIP_PAGE || bpage.zip.data);
+ if (&bpage < &watch[0] || &bpage >= &watch[array_elements(watch)])
return false;
- }
-
- ut_ad(bpage.state() == BUF_BLOCK_ZIP_PAGE);
ut_ad(!bpage.in_zip_hash);
ut_ad(!bpage.zip.data);
return true;
@@ -1713,44 +1458,55 @@ public:
This may only be called after !watch_set() and before invoking watch_unset().
@param id page identifier
@return whether the page was read to the buffer pool */
+ TRANSACTIONAL_INLINE
bool watch_occurred(const page_id_t id)
{
- const ulint fold= id.fold();
- page_hash_latch *hash_lock= page_hash.lock<false>(fold);
+ hash_chain &chain= page_hash.cell_get(id.fold());
+ transactional_shared_lock_guard<page_hash_latch> g
+ {page_hash.lock_get(chain)};
/* The page must exist because watch_set() increments buf_fix_count. */
- buf_page_t *bpage= page_hash_get_low(id, fold);
- const bool is_sentinel= watch_is_sentinel(*bpage);
- hash_lock->read_unlock();
- return !is_sentinel;
+ return !watch_is_sentinel(*page_hash.get(id, chain));
}
- /** Register a watch for a page identifier. The caller must hold an
- exclusive page hash latch. The *hash_lock may be released,
- relocated, and reacquired.
+ /** Register a watch for a page identifier.
@param id page identifier
- @param hash_lock exclusively held page_hash latch
- @return a buffer pool block corresponding to id
- @retval nullptr if the block was not present, and a watch was installed */
- inline buf_page_t *watch_set(const page_id_t id,
- page_hash_latch **hash_lock);
+ @param chain page_hash.cell_get(id.fold())
+ @return a buffer page corresponding to id
+ @retval nullptr if the block was not present in page_hash */
+ buf_page_t *watch_set(const page_id_t id, hash_chain &chain);
/** Stop watching whether a page has been read in.
watch_set(id) must have returned nullptr before.
- @param id page identifier */
- void watch_unset(const page_id_t id);
+ @param id page identifier
+ @param chain unlocked hash table chain */
+ void watch_unset(const page_id_t id, hash_chain &chain);
/** Remove the sentinel block for the watch before replacing it with a
real block. watch_unset() or watch_occurred() will notice
that the block has been replaced with the real block.
- @param watch sentinel */
- inline void watch_remove(buf_page_t *watch);
+ @param w sentinel
+ @param chain locked hash table chain
+ @return w->state() */
+ inline uint32_t watch_remove(buf_page_t *w, hash_chain &chain);
/** @return whether less than 1/4 of the buffer pool is available */
+ TPOOL_SUPPRESS_TSAN
bool running_out() const
{
return !recv_recovery_is_on() &&
- UNIV_UNLIKELY(UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) <
- std::min(curr_size, old_size) / 4);
+ UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) <
+ n_chunks_new / 4 * chunks->size;
+ }
+
+ /** @return whether the buffer pool has run out */
+ TPOOL_SUPPRESS_TSAN
+ bool ran_out() const
+ { return UNIV_UNLIKELY(!try_LRU_scan || !UT_LIST_GET_LEN(free)); }
+
+ /** @return whether the buffer pool is shrinking */
+ inline bool is_shrinking() const
+ {
+ return n_chunks_new < n_chunks;
}
#ifdef UNIV_DEBUG
@@ -1783,18 +1539,11 @@ public:
static constexpr uint32_t READ_AHEAD_PAGES= 64;
/** Buffer pool mutex */
- MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex;
- /** Number of pending LRU flush; protected by mutex. */
- ulint n_flush_LRU_;
- /** broadcast when n_flush_LRU reaches 0; protected by mutex */
- pthread_cond_t done_flush_LRU;
- /** Number of pending flush_list flush; protected by mutex */
- ulint n_flush_list_;
- /** broadcast when n_flush_list reaches 0; protected by mutex */
- pthread_cond_t done_flush_list;
-
- TPOOL_SUPPRESS_TSAN ulint n_flush_LRU() const { return n_flush_LRU_; }
- TPOOL_SUPPRESS_TSAN ulint n_flush_list() const { return n_flush_list_; }
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex;
+ /** current statistics; protected by mutex */
+ buf_pool_stat_t stat;
+ /** old statistics; protected by mutex */
+ buf_pool_stat_t old_stat;
/** @name General fields */
/* @{ */
@@ -1809,30 +1558,35 @@ public:
ut_allocator<unsigned char> allocator; /*!< Allocator used for
allocating memory for the the "chunks"
member. */
- volatile ulint n_chunks; /*!< number of buffer pool chunks */
- volatile ulint n_chunks_new; /*!< new number of buffer pool chunks */
+ ulint n_chunks; /*!< number of buffer pool chunks */
+ ulint n_chunks_new; /*!< new number of buffer pool chunks.
+ both n_chunks{,new} are protected under
+ mutex */
chunk_t* chunks; /*!< buffer pool chunks */
chunk_t* chunks_old; /*!< old buffer pool chunks to be freed
after resizing buffer pool */
/** current pool size in pages */
Atomic_counter<ulint> curr_size;
- /** previous pool size in pages */
- Atomic_counter<ulint> old_size;
/** read-ahead request size in pages */
Atomic_counter<uint32_t> read_ahead_area;
- /** Hash table with singly-linked overflow lists. @see hash_table_t */
+ /** Hash table with singly-linked overflow lists */
struct page_hash_table
{
+ static_assert(CPU_LEVEL1_DCACHE_LINESIZE >= 64, "less than 64 bytes");
+ static_assert(!(CPU_LEVEL1_DCACHE_LINESIZE & 63),
+ "not a multiple of 64 bytes");
+
/** Number of array[] elements per page_hash_latch.
Must be one less than a power of 2. */
- static constexpr size_t ELEMENTS_PER_LATCH= CPU_LEVEL1_DCACHE_LINESIZE /
- sizeof(void*) - 1;
+ static constexpr size_t ELEMENTS_PER_LATCH= 64 / sizeof(void*) - 1;
+ static constexpr size_t EMPTY_SLOTS_PER_LATCH=
+ ((CPU_LEVEL1_DCACHE_LINESIZE / 64) - 1) * (64 / sizeof(void*));
/** number of payload elements in array[] */
Atomic_relaxed<ulint> n_cells;
/** the hash table, with pad(n_cells) elements, aligned to L1 cache size */
- hash_cell_t *array;
+ hash_chain *array;
/** Create the hash table.
@param n the lower bound of n_cells */
@@ -1844,7 +1598,12 @@ public:
/** @return the index of an array element */
ulint calc_hash(ulint fold) const { return calc_hash(fold, n_cells); }
/** @return raw array index converted to padded index */
- static ulint pad(ulint h) { return 1 + (h / ELEMENTS_PER_LATCH) + h; }
+ static ulint pad(ulint h)
+ {
+ ulint latches= h / ELEMENTS_PER_LATCH;
+ ulint empty_slots= latches * EMPTY_SLOTS_PER_LATCH;
+ return 1 + latches + empty_slots + h;
+ }
private:
/** @return the hash value before any ELEMENTS_PER_LATCH padding */
static ulint hash(ulint fold, ulint n) { return ut_hash_ulint(fold, n); }
@@ -1854,29 +1613,72 @@ public:
{
return pad(hash(fold, n_cells));
}
- /** Get a page_hash latch. */
- page_hash_latch *lock_get(ulint fold, ulint n) const
+ public:
+ /** @return the latch covering a hash table chain */
+ static page_hash_latch &lock_get(hash_chain &chain)
{
static_assert(!((ELEMENTS_PER_LATCH + 1) & ELEMENTS_PER_LATCH),
"must be one less than a power of 2");
- return reinterpret_cast<page_hash_latch*>
- (&array[calc_hash(fold, n) & ~ELEMENTS_PER_LATCH]);
+ const size_t addr= reinterpret_cast<size_t>(&chain);
+ ut_ad(addr & (ELEMENTS_PER_LATCH * sizeof chain));
+ return *reinterpret_cast<page_hash_latch*>
+ (addr & ~(ELEMENTS_PER_LATCH * sizeof chain));
}
- public:
- /** Get a page_hash latch. */
- page_hash_latch *lock_get(ulint fold) const
- { return lock_get(fold, n_cells); }
-
- /** Acquire an array latch.
- @tparam exclusive whether the latch is to be acquired exclusively
- @param fold hash bucket key */
- template<bool exclusive> page_hash_latch *lock(ulint fold)
+
+ /** Get a hash table slot. */
+ hash_chain &cell_get(ulint fold) const
+ { return array[calc_hash(fold, n_cells)]; }
+
+ /** Append a block descriptor to a hash bucket chain. */
+ void append(hash_chain &chain, buf_page_t *bpage)
+ {
+ ut_ad(!bpage->in_page_hash);
+ ut_ad(!bpage->hash);
+ ut_d(bpage->in_page_hash= true);
+ buf_page_t **prev= &chain.first;
+ while (*prev)
+ {
+ ut_ad((*prev)->in_page_hash);
+ prev= &(*prev)->hash;
+ }
+ *prev= bpage;
+ }
+
+ /** Remove a block descriptor from a hash bucket chain. */
+ void remove(hash_chain &chain, buf_page_t *bpage)
{
- page_hash_latch *latch= lock_get(fold, n_cells);
- latch->acquire<exclusive>();
- return latch;
+ ut_ad(bpage->in_page_hash);
+ buf_page_t **prev= &chain.first;
+ while (*prev != bpage)
+ {
+ ut_ad((*prev)->in_page_hash);
+ prev= &(*prev)->hash;
+ }
+ *prev= bpage->hash;
+ ut_d(bpage->in_page_hash= false);
+ bpage->hash= nullptr;
}
+ /** Replace a block descriptor with another. */
+ void replace(hash_chain &chain, buf_page_t *old, buf_page_t *bpage)
+ {
+ ut_ad(old->in_page_hash);
+ ut_ad(bpage->in_page_hash);
+ ut_d(old->in_page_hash= false);
+ ut_ad(bpage->hash == old->hash);
+ old->hash= nullptr;
+ buf_page_t **prev= &chain.first;
+ while (*prev != old)
+ {
+ ut_ad((*prev)->in_page_hash);
+ prev= &(*prev)->hash;
+ }
+ *prev= bpage;
+ }
+
+ /** Look up a page in a hash bucket chain. */
+ inline buf_page_t *get(const page_id_t id, const hash_chain &chain) const;
+
/** Exclusively aqcuire all latches */
inline void write_lock_all();
@@ -1891,8 +1693,6 @@ public:
/** map of block->frame to buf_block_t blocks that belong
to buf_buddy_alloc(); protected by buf_pool.mutex */
hash_table_t zip_hash;
- /** number of pending read operations */
- Atomic_counter<ulint> n_pend_reads;
Atomic_counter<ulint>
n_pend_unzip; /*!< number of pending decompressions */
@@ -1902,44 +1702,90 @@ public:
buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
/*!< Statistics of buddy system,
indexed by block size */
- buf_pool_stat_t stat; /*!< current statistics */
- buf_pool_stat_t old_stat; /*!< old statistics */
/* @} */
+ /** number of index page splits */
+ Atomic_counter<ulint> pages_split;
+
/** @name Page flushing algorithm fields */
/* @{ */
/** mutex protecting flush_list, buf_page_t::set_oldest_modification()
and buf_page_t::list pointers when !oldest_modification() */
- MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_list_mutex;
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_list_mutex;
/** "hazard pointer" for flush_list scans; protected by flush_list_mutex */
FlushHp flush_hp;
- /** modified blocks (a subset of LRU) */
+ /** flush_list size in bytes; protected by flush_list_mutex */
+ ulint flush_list_bytes;
+ /** possibly modified persistent pages (a subset of LRU);
+ os_aio_pending_writes() is approximately COUNT(is_write_fixed()) */
UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
private:
- /** whether the page cleaner needs wakeup from indefinite sleep */
- bool page_cleaner_is_idle;
+ static constexpr unsigned PAGE_CLEANER_IDLE= 1;
+ static constexpr unsigned FLUSH_LIST_ACTIVE= 2;
+ static constexpr unsigned LRU_FLUSH= 4;
+
+ /** Number of pending LRU flush * LRU_FLUSH +
+ PAGE_CLEANER_IDLE + FLUSH_LIST_ACTIVE flags */
+ unsigned page_cleaner_status;
/** track server activity count for signaling idle flushing */
ulint last_activity_count;
public:
/** signalled to wake up the page_cleaner; protected by flush_list_mutex */
pthread_cond_t do_flush_list;
+ /** broadcast when !n_flush(); protected by flush_list_mutex */
+ pthread_cond_t done_flush_LRU;
+ /** broadcast when a batch completes; protected by flush_list_mutex */
+ pthread_cond_t done_flush_list;
+
+ /** @return number of pending LRU flush */
+ unsigned n_flush() const
+ {
+ mysql_mutex_assert_owner(&flush_list_mutex);
+ return page_cleaner_status / LRU_FLUSH;
+ }
+
+ /** Increment the number of pending LRU flush */
+ inline void n_flush_inc();
+
+ /** Decrement the number of pending LRU flush */
+ inline void n_flush_dec();
+
+ /** @return whether flush_list flushing is active */
+ bool flush_list_active() const
+ {
+ mysql_mutex_assert_owner(&flush_list_mutex);
+ return page_cleaner_status & FLUSH_LIST_ACTIVE;
+ }
+
+ void flush_list_set_active()
+ {
+ ut_ad(!flush_list_active());
+ page_cleaner_status+= FLUSH_LIST_ACTIVE;
+ }
+ void flush_list_set_inactive()
+ {
+ ut_ad(flush_list_active());
+ page_cleaner_status-= FLUSH_LIST_ACTIVE;
+ }
/** @return whether the page cleaner must sleep due to being idle */
bool page_cleaner_idle() const
{
mysql_mutex_assert_owner(&flush_list_mutex);
- return page_cleaner_is_idle;
+ return page_cleaner_status & PAGE_CLEANER_IDLE;
}
- /** Wake up the page cleaner if needed */
- void page_cleaner_wakeup();
+ /** Wake up the page cleaner if needed.
+ @param for_LRU whether to wake up for LRU eviction */
+ void page_cleaner_wakeup(bool for_LRU= false);
/** Register whether an explicit wakeup of the page cleaner is needed */
void page_cleaner_set_idle(bool deep_sleep)
{
mysql_mutex_assert_owner(&flush_list_mutex);
- page_cleaner_is_idle= deep_sleep;
+ page_cleaner_status= (page_cleaner_status & ~PAGE_CLEANER_IDLE) |
+ (PAGE_CLEANER_IDLE * deep_sleep);
}
/** Update server last activity count */
@@ -1949,9 +1795,6 @@ public:
last_activity_count= activity_count;
}
- // n_flush_LRU() + n_flush_list()
- // is approximately COUNT(io_fix()==BUF_IO_WRITE) in flush_list
-
unsigned freed_page_clock;/*!< a sequence number used
to count the number of buffer
blocks removed from the end of
@@ -1961,16 +1804,10 @@ public:
to read this for heuristic
purposes without holding any
mutex or latch */
- bool try_LRU_scan; /*!< Cleared when an LRU
- scan for free block fails. This
- flag is used to avoid repeated
- scans of LRU list when we know
- that there is no free block
- available in the scan depth for
- eviction. Set whenever
- we flush a batch from the
- buffer pool. Protected by the
- buf_pool.mutex */
+ /** Cleared when buf_LRU_get_free_block() fails.
+ Set whenever the free list grows, along with a broadcast of done_free.
+ Protected by buf_pool.mutex. */
+ Atomic_relaxed<bool> try_LRU_scan;
/* @} */
/** @name LRU replacement algorithm fields */
@@ -1979,7 +1816,8 @@ public:
UT_LIST_BASE_NODE_T(buf_page_t) free;
/*!< base node of the free
block list */
- /** signaled each time when the free list grows; protected by mutex */
+ /** broadcast each time when the free list grows or try_LRU_scan is set;
+ protected by mutex */
pthread_cond_t done_free;
UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
@@ -2034,34 +1872,13 @@ public:
/** Reserve a buffer. */
buf_tmp_buffer_t *io_buf_reserve() { return io_buf.reserve(); }
- /** @return whether any I/O is pending */
- bool any_io_pending()
- {
- if (n_pend_reads)
- return true;
- mysql_mutex_lock(&mutex);
- const bool any_pending{n_flush_LRU_ || n_flush_list_};
- mysql_mutex_unlock(&mutex);
- return any_pending;
- }
- /** @return total amount of pending I/O */
- ulint io_pending() const
- {
- return n_pend_reads + n_flush_LRU() + n_flush_list();
- }
-
private:
/** Remove a block from the flush list. */
inline void delete_from_flush_list_low(buf_page_t *bpage);
- /** Remove a block from flush_list.
- @param bpage buffer pool page
- @param clear whether to invoke buf_page_t::clear_oldest_modification() */
- void delete_from_flush_list(buf_page_t *bpage, bool clear);
public:
/** Remove a block from flush_list.
@param bpage buffer pool page */
- void delete_from_flush_list(buf_page_t *bpage)
- { delete_from_flush_list(bpage, true); }
+ void delete_from_flush_list(buf_page_t *bpage);
/** Insert a modified block into the flush list.
@param block modified block
@@ -2069,7 +1886,7 @@ public:
void insert_into_flush_list(buf_block_t *block, lsn_t lsn);
/** Free a page whose underlying file page has been freed. */
- inline void release_freed_page(buf_page_t *bpage);
+ ATTRIBUTE_COLD void release_freed_page(buf_page_t *bpage);
private:
/** Temporary memory for page_compressed and encrypted I/O */
@@ -2080,34 +1897,12 @@ private:
/** array of slots */
buf_tmp_buffer_t *slots;
- void create(ulint n_slots)
- {
- this->n_slots= n_slots;
- slots= static_cast<buf_tmp_buffer_t*>
- (ut_malloc_nokey(n_slots * sizeof *slots));
- memset((void*) slots, 0, n_slots * sizeof *slots);
- }
+ void create(ulint n_slots);
- void close()
- {
- for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
- {
- aligned_free(s->crypt_buf);
- aligned_free(s->comp_buf);
- }
- ut_free(slots);
- slots= nullptr;
- n_slots= 0;
- }
+ void close();
/** Reserve a buffer */
- buf_tmp_buffer_t *reserve()
- {
- for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
- if (s->acquire())
- return s;
- return nullptr;
- }
+ buf_tmp_buffer_t *reserve();
} io_buf;
/** whether resize() is in the critical path */
@@ -2117,64 +1912,46 @@ private:
/** The InnoDB buffer pool */
extern buf_pool_t buf_pool;
-inline void page_hash_latch::read_lock()
+inline buf_page_t *buf_pool_t::page_hash_table::get(const page_id_t id,
+ const hash_chain &chain)
+ const
+{
+#ifdef SAFE_MUTEX
+ DBUG_ASSERT(mysql_mutex_is_owner(&buf_pool.mutex) ||
+ lock_get(const_cast<hash_chain&>(chain)).is_locked());
+#endif /* SAFE_MUTEX */
+ for (buf_page_t *bpage= chain.first; bpage; bpage= bpage->hash)
+ {
+ ut_ad(bpage->in_page_hash);
+ ut_ad(bpage->in_file());
+ if (bpage->id() == id)
+ return bpage;
+ }
+ return nullptr;
+}
+
+#ifdef SUX_LOCK_GENERIC
+inline void page_hash_latch::lock_shared()
{
mysql_mutex_assert_not_owner(&buf_pool.mutex);
if (!read_trylock())
read_lock_wait();
}
-inline void page_hash_latch::write_lock()
+inline void page_hash_latch::lock()
{
if (!write_trylock())
write_lock_wait();
}
+#endif /* SUX_LOCK_GENERIC */
-inline void buf_page_t::add_buf_fix_count(uint32_t count)
-{
- mysql_mutex_assert_owner(&buf_pool.mutex);
- buf_fix_count_+= count;
-}
-
-inline void buf_page_t::set_buf_fix_count(uint32_t count)
-{
- mysql_mutex_assert_owner(&buf_pool.mutex);
- buf_fix_count_= count;
-}
-
-inline void buf_page_t::set_state(buf_page_state state)
-{
- mysql_mutex_assert_owner(&buf_pool.mutex);
-#ifdef UNIV_DEBUG
- switch (state) {
- case BUF_BLOCK_REMOVE_HASH:
- /* buf_pool_t::corrupted_evict() invokes set_corrupt_id()
- before buf_LRU_free_one_page(), so we cannot assert that
- we are holding the hash_lock. */
- break;
- case BUF_BLOCK_MEMORY:
- if (!in_file()) break;
- /* fall through */
- case BUF_BLOCK_FILE_PAGE:
- ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked());
- break;
- case BUF_BLOCK_NOT_USED:
- if (!in_file()) break;
- /* fall through */
- case BUF_BLOCK_ZIP_PAGE:
- ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked() ||
- (this >= &buf_pool.watch[0] &&
- this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]));
- break;
- }
-#endif
- state_= state;
-}
-
-inline void buf_page_t::set_io_fix(buf_io_fix io_fix)
+inline void buf_page_t::set_state(uint32_t s)
{
mysql_mutex_assert_owner(&buf_pool.mutex);
- io_fix_= io_fix;
+ ut_ad(s <= REMOVE_HASH || s >= UNFIXED);
+ ut_ad(s < WRITE_FIX);
+ ut_ad(s <= READ_FIX || zip.fix == READ_FIX);
+ zip.fix= s;
}
inline void buf_page_t::set_corrupt_id()
@@ -2191,19 +1968,15 @@ inline void buf_page_t::set_corrupt_id()
default:
ut_ad("block is dirty" == 0);
}
- switch (state()) {
- case BUF_BLOCK_REMOVE_HASH:
- break;
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_FILE_PAGE:
- ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked());
- break;
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_MEMORY:
- ut_ad("invalid state" == 0);
+ const auto f= state();
+ if (f != REMOVE_HASH)
+ {
+ ut_ad(f >= UNFIXED);
+ ut_ad(buf_pool.page_hash.lock_get(buf_pool.page_hash.cell_get(id_.fold())).
+ is_write_locked());
}
#endif
- id_= page_id_t(~0ULL);
+ id_.set_corrupted();
}
/** Set oldest_modification when adding to buf_pool.flush_list */
@@ -2218,10 +1991,12 @@ inline void buf_page_t::set_oldest_modification(lsn_t lsn)
/** Clear oldest_modification after removing from buf_pool.flush_list */
inline void buf_page_t::clear_oldest_modification()
{
- mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
- ut_d(const auto state= state_);
- ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_ZIP_PAGE ||
- state == BUF_BLOCK_REMOVE_HASH);
+#ifdef SAFE_MUTEX
+ if (oldest_modification() != 2)
+ mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
+#endif /* SAFE_MUTEX */
+ ut_d(const auto s= state());
+ ut_ad(s >= REMOVE_HASH);
ut_ad(oldest_modification());
ut_ad(!list.prev);
ut_ad(!list.next);
@@ -2231,46 +2006,16 @@ inline void buf_page_t::clear_oldest_modification()
oldest_modification_.store(0, std::memory_order_release);
}
-/** Note that a block is no longer dirty, while not removing
-it from buf_pool.flush_list */
-inline void buf_page_t::clear_oldest_modification(bool temporary)
-{
- ut_ad(temporary == fsp_is_system_temporary(id().space()));
- if (temporary)
- {
- ut_ad(oldest_modification() == 2);
- oldest_modification_= 0;
- }
- else
- {
- /* We use release memory order to guarantee that callers of
- oldest_modification_acquire() will observe the block as
- being detached from buf_pool.flush_list, after reading the value 0. */
- ut_ad(oldest_modification() > 2);
- oldest_modification_.store(1, std::memory_order_release);
- }
-}
-
-/** @return whether the block is modified and ready for flushing */
-inline bool buf_page_t::ready_for_flush() const
-{
- mysql_mutex_assert_owner(&buf_pool.mutex);
- ut_ad(in_LRU_list);
- ut_a(in_file());
- ut_ad(fsp_is_system_temporary(id().space())
- ? oldest_modification() == 2
- : oldest_modification() > 2);
- return io_fix_ == BUF_IO_NONE;
-}
-
/** @return whether the block can be relocated in memory.
The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
inline bool buf_page_t::can_relocate() const
{
mysql_mutex_assert_owner(&buf_pool.mutex);
- ut_ad(in_file());
+ const auto f= state();
+ ut_ad(f >= FREED);
ut_ad(in_LRU_list);
- return io_fix_ == BUF_IO_NONE && !buf_fix_count_;
+ return (f == FREED || (f < READ_FIX && !(f & ~LRU_MASK))) &&
+ !lock.is_locked_or_waiting();
}
/** @return whether the block has been flagged old in buf_pool.LRU */
@@ -2331,41 +2076,26 @@ inline void buf_page_t::set_old(bool old)
/**********************************************************************
Let us list the consistency conditions for different control block states.
-NOT_USED: is in free list, not in LRU list, not in flush list, nor
- page hash table
-MEMORY: is not in free list, LRU list, or flush list, nor page
- hash table
-FILE_PAGE: space and offset are defined, is in page hash table
- if io_fix == BUF_IO_WRITE,
- buf_pool.n_flush_LRU() || buf_pool.n_flush_list()
-
- (1) if buf_fix_count == 0, then
- is in LRU list, not in free list
- is in flush list,
- if and only if oldest_modification > 0
- is x-locked,
- if and only if io_fix == BUF_IO_READ
- is s-locked,
- if and only if io_fix == BUF_IO_WRITE
-
- (2) if buf_fix_count > 0, then
- is not in LRU list, not in free list
- is in flush list,
- if and only if oldest_modification > 0
- if io_fix == BUF_IO_READ,
- is x-locked
- if io_fix == BUF_IO_WRITE,
- is s-locked
+NOT_USED: is in free list, not LRU, not flush_list, nor page_hash
+MEMORY: is not in any of free, LRU, flush_list, page_hash
+in_file(): is not in free list, is in LRU list, id() is defined,
+ is in page_hash (not necessarily if is_read_fixed())
+
+ is in buf_pool.flush_list, if and only
+ if oldest_modification == 1 || oldest_modification > 2
+
+ (1) if is_write_fixed(): is u-locked
+ (2) if is_read_fixed(): is x-locked
State transitions:
NOT_USED => MEMORY
-MEMORY => FILE_PAGE
MEMORY => NOT_USED
-FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if
- (1) buf_fix_count == 0,
- (2) oldest_modification == 0, and
- (3) io_fix == 0.
+MEMORY => UNFIXED
+UNFIXED => in_file()
+in_file() => UNFIXED or FREED
+UNFIXED or FREED => REMOVE_HASH
+REMOVE_HASH => NOT_USED (if and only if !oldest_modification())
*/
/** Select from where to start a scan. If we have scanned
@@ -2427,5 +2157,3 @@ struct CheckUnzipLRUAndLRUList {
#include "buf0buf.inl"
#endif /* !UNIV_INNOCHECKSUM */
-
-#endif