diff options
author | Vlad Lesin <vlad_lesin@mail.ru> | 2020-12-10 21:11:26 +0300 |
---|---|---|
committer | Vlad Lesin <vlad_lesin@mail.ru> | 2020-12-30 15:55:25 +0300 |
commit | d92be859d61c8b9d295c06ed84c65159be5675dc (patch) | |
tree | 391fa28e89a15cb82e2cfc84a2923ed42b5e89d6 | |
parent | 6859e80df7b7307c1c1f140867d6b54ec472e9ac (diff) | |
download | mariadb-git-10.5-MDEV-18976-redolog-crc.tar.gz |
MDEV-18976: Implement a CHECKSUM redo log record for improved validation10.5-MDEV-18976-redolog-crc
This is draft implementation without test.
-rw-r--r-- | mysql-test/suite/innodb/t/MDEV-18976-redolog-crc.test | 21 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 4 | ||||
-rw-r--r-- | storage/innobase/fsp/fsp0fsp.cc | 6 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 6 | ||||
-rw-r--r-- | storage/innobase/include/buf0types.h | 2 | ||||
-rw-r--r-- | storage/innobase/include/mtr0log.h | 26 | ||||
-rw-r--r-- | storage/innobase/include/mtr0mtr.h | 43 | ||||
-rw-r--r-- | storage/innobase/include/mtr0types.h | 3 | ||||
-rw-r--r-- | storage/innobase/include/srv0srv.h | 1 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 33 | ||||
-rw-r--r-- | storage/innobase/mtr/mtr0mtr.cc | 53 | ||||
-rw-r--r-- | storage/innobase/srv/srv0srv.cc | 2 |
12 files changed, 183 insertions, 17 deletions
diff --git a/mysql-test/suite/innodb/t/MDEV-18976-redolog-crc.test b/mysql-test/suite/innodb/t/MDEV-18976-redolog-crc.test new file mode 100644 index 00000000000..76503ef56a7 --- /dev/null +++ b/mysql-test/suite/innodb/t/MDEV-18976-redolog-crc.test @@ -0,0 +1,21 @@ +--source include/have_innodb.inc +--source include/have_debug.inc + +# Disable pages flushing to allow redo log records to be executed on --prepare. +#SET @old_debug_dbug=@@global.debug_dbug; +SET GLOBAL debug_dbug="+d,ib_log_checkpoint_avoid"; +#SET @old_innodb_page_cleaner_disabled_debug=@@global.innodb_page_cleaner_disabled_debug; +SET GLOBAL innodb_page_cleaner_disabled_debug=ON; +SET GLOBAL innodb_redo_log_checksum=ON; + +CREATE TABLE t(i INT) ENGINE INNODB; +INSERT INTO t VALUES (1), (2), (3), (4), (5); + +--source include/kill_mysqld.inc + + +--source include/start_mysqld.inc + +DROP TABLE t; +#SET GLOBAL innodb_page_cleaner_disabled_debug=@old_innodb_page_cleaner_disabled_debug; +#SET GLOBAL debug_dbug=@old_debug_dbug; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 2ddd9f278b1..05dbe068bbf 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2504,10 +2504,6 @@ void buf_page_free(const page_id_t page_id, buf_block_t *block= reinterpret_cast<buf_block_t*> (buf_pool.page_hash_get_low(page_id, fold)); - /* TODO: try to all this part of mtr_t::free() */ - if (srv_immediate_scrub_data_uncompressed || mtr->is_page_compressed()) - mtr->add_freed_offset(page_id); - if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE) { /* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE, diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index ae8c557b24c..167a87077a0 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -2637,9 +2637,9 @@ fseg_free_extent( for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) { if (!xdes_is_free(descr, i)) { - buf_page_free( - page_id_t(space->id, first_page_in_extent + 1), - mtr, __FILE__, __LINE__); + page_id_t freed_page_id(space->id, first_page_in_extent + 1); + buf_page_free(freed_page_id, mtr, __FILE__, __LINE__); + mtr->add_freed_offset(freed_page_id); } } } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 9a47fb6b30f..9cad32e1917 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -19914,6 +19914,11 @@ static MYSQL_SYSVAR_BOOL(immediate_scrub_data_uncompressed, "Enable scrubbing of data", NULL, NULL, FALSE); +static MYSQL_SYSVAR_BOOL( + redo_log_checksum, srv_redo_log_checksum, 0, + "Write redo log record with page crc for each modified page on mtr commit", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_BOOL(background_scrub_data_uncompressed, deprecated::innodb_background_scrub_data_uncompressed, PLUGIN_VAR_OPCMDARG, innodb_deprecated_ignored, NULL, @@ -20140,6 +20145,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(background_thread), MYSQL_SYSVAR(encrypt_temporary_tables), + MYSQL_SYSVAR(redo_log_checksum), NULL }; diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index ba1e2e5eaa6..e2026052122 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -171,7 +171,7 @@ public: m_id= (m_id & ~uint64_t{0} << 32) | page_no; } - ulonglong raw() { return m_id; } + ulonglong raw() const { return m_id; } private: /** The page identifier */ uint64_t m_id; diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index 2bcd69d8899..8d7ff84b1aa 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -384,8 +384,8 @@ template<byte type> inline byte *mtr_t::log_write(const page_id_t id, const buf_page_t *bpage, size_t len, bool alloc, size_t offset) { - static_assert(!(type & 15) && type != RESERVED && type != OPTION && - type <= FILE_CHECKPOINT, "invalid type"); + static_assert(!(type & 15) && type != RESERVED && type <= FILE_CHECKPOINT, + "invalid type"); ut_ad(type >= FILE_CREATE || is_named_space(id.space())); ut_ad(!bpage || bpage->id() == id); constexpr bool have_len= type != INIT_PAGE && type != FREE_PAGE; @@ -541,9 +541,13 @@ inline void mtr_t::init(buf_block_t *b) inline void mtr_t::free(fil_space_t &space, uint32_t offset) { page_id_t freed_page_id(space.id, offset); + + if (srv_redo_log_checksum || srv_immediate_scrub_data_uncompressed + || is_page_compressed()) + add_freed_offset(freed_page_id); + if (m_log_mode == MTR_LOG_ALL) m_log.close(log_write<FREE_PAGE>(freed_page_id, nullptr)); - ut_ad(!m_user_space || m_user_space == &space); if (&space == fil_system.sys_space) freed_system_tablespace_page(); @@ -673,3 +677,19 @@ inline void mtr_t::trim_pages(const page_id_t id) m_log.close(l); set_trim_pages(); } + +inline void mtr_t::page_checksum(const page_id_t id, uint32_t crc, + lsn_t flushed_lsn) +{ + if (m_log_mode != MTR_LOG_ALL) + return; + static_assert(sizeof(crc) == 4, "compatibility"); + static_assert(sizeof(flushed_lsn) == 8, "compatibility"); + byte* l = log_write<OPTION>(id, nullptr, 4 + 8 + 1, true); + *l++ = CHECKSUM; + mach_write_to_4(l, crc); + l += 4; + mach_write_to_8(l, flushed_lsn); + l += 8; + m_log.close(l); +} diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index f8ab7cf440f..7997b170434 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -344,7 +344,7 @@ public: /** Check if we are holding exclusive tablespace latch @param space tablespace to search for @return whether space.latch is being held */ - bool memo_contains(const fil_space_t& space) + bool memo_contains(const fil_space_t& space) const MY_ATTRIBUTE((warn_unused_result)); @@ -378,7 +378,7 @@ public: mtr_buf_t* get_memo() { return &m_memo; } /** @return true if system tablespace page has been freed */ - bool is_freed_system_tablespace_page() + bool is_freed_system_tablespace_page() const { return m_freed_in_system_tablespace; } @@ -577,6 +577,9 @@ public: @param id first page identifier that will not be in the file */ inline void trim_pages(const page_id_t id); + inline void page_checksum(const page_id_t id, uint32_t crc, + lsn_t flushed_lsn); + /** Write a log record about a file operation. @param type file operation @param space_id tablespace identifier @@ -645,6 +648,42 @@ public: { ut_ad(!m_commit || m_start); return m_start && !m_commit; } /** @return whether the mini-transaction has been committed */ bool has_committed() const { ut_ad(!m_commit || m_start); return m_commit; } + bool page_is_freed(page_id_t id) const + { + if (!m_freed_pages) + return false; + fil_space_t *freed_space= m_user_space; + /* Get the freed tablespace in case of predefined tablespace */ + if (!freed_space) + { + ut_ad(is_freed_system_tablespace_page()); + freed_space= fil_system.sys_space; + } + + ut_ad(memo_contains(*freed_space)); + + if (id.space() != freed_space->id) + return false; + + return m_freed_pages->contains(id.page_no()); + } + static uint32_t page_crc(const byte* page) + { + /* Since the field FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, and in + versions <= 4.1.x FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, are written + outside the buffer pool to the first pages of data files, we have to + skip them in the page checksum calculation. We must also skip the + field FIL_PAGE_SPACE_OR_CHKSUM where the checksum is stored, and also + the last 8 bytes of page because there we store the old formula + checksum. */ + return static_cast<uint32_t>( + ut_fold_binary(page + FIL_PAGE_OFFSET, FIL_PAGE_LSN - FIL_PAGE_OFFSET) + + ut_fold_binary(page + FIL_PAGE_TYPE, + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION - FIL_PAGE_PREV) + + ut_fold_binary(page + FIL_PAGE_DATA, srv_page_size - FIL_PAGE_DATA + - FIL_PAGE_END_LSN_OLD_CHKSUM)); + } + private: /** whether start() has been called */ bool m_start= false; diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h index d1b6784ae86..d7c5ccc6baa 100644 --- a/storage/innobase/include/mtr0types.h +++ b/storage/innobase/include/mtr0types.h @@ -289,6 +289,9 @@ enum mrec_ext_t TRIM_PAGES= 10 }; +enum mrec_opt_t { + CHECKSUM +}; /** Redo log record types for file-level operations. These bit patterns will be written to redo log files, so the existing codes or diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 7b65000c115..c6cb789dce9 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -410,6 +410,7 @@ extern ulong srv_max_purge_lag_delay; extern my_bool innodb_encrypt_temporary_tables; extern my_bool srv_immediate_scrub_data_uncompressed; +extern my_bool srv_redo_log_checksum; /*-------------------------------------------*/ /** Modes of operation */ diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index fe691a4b256..117e791ddff 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -266,7 +266,6 @@ public: next_not_same_page: last_offset= 1; /* the next record must not be same_page */ } - next: l+= rlen; continue; } @@ -280,7 +279,32 @@ public: switch (b & 0x70) { case OPTION: - goto next; + { + if (UNIV_UNLIKELY(rlen != 1 + 4 + 8 || *l != CHECKSUM)) + goto record_corrupted; + ++l; + uint32_t crc = mach_read_from_4(l); + l += 4; +// lsn_t flushed_lsn = mach_read_from_8(l); + l += 8; + uint32_t calc_crc = mtr_t::page_crc(frame); +// lsn_t flushed_lsn_from_page = mach_read_from_8(frame + FIL_PAGE_LSN); + if (calc_crc != crc) { + ib::warn() << "Page checksum stored in redo log record " << crc + << " does not match counted checksum " << calc_crc + << " for page " << block.page.id(); + } + /* + if (flushed_lsn_from_page != flushed_lsn) { + ib::warn() << "Page LSN stored in redo log record " << flushed_lsn + << " does not match " << flushed_lsn_from_page + << " stored on page " << block.page.id(); + failed = true; + } + */ + applied = APPLIED_YES; + continue; + } case EXTENDED: if (UNIV_UNLIKELY(block.page.id().page_no() < 3 || block.page.zip.ssize)) @@ -1970,8 +1994,11 @@ same_page: } last_offset= FIL_PAGE_TYPE; break; - case RESERVED: case OPTION: + if (UNIV_UNLIKELY(rlen != 1 + 4 + 8 || *l != CHECKSUM)) + goto record_corrupted; + break; + case RESERVED: continue; case WRITE: case MEMMOVE: diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 60c91364b15..cfe0aed162a 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -32,6 +32,7 @@ Created 11/26/1995 Heikki Tuuri #include "page0types.h" #include "mtr0log.h" #include "log0recv.h" +#include <unordered_set> /** Iterate over a memo block in reverse. */ template <typename Functor> @@ -300,6 +301,50 @@ struct ReleaseAll { } }; + +class WriteOptionCRC { +public: + WriteOptionCRC(mtr_t &mtr) : m_mtr(mtr) {} + /** @return true always. */ + bool operator()(mtr_memo_slot_t *slot) + { + if (slot->type & MTR_MEMO_MODIFY) + { +#ifdef UNIV_DEBUG + switch (slot->type & ~MTR_MEMO_MODIFY) { + case MTR_MEMO_BUF_FIX: + case MTR_MEMO_PAGE_S_FIX: + case MTR_MEMO_PAGE_SX_FIX: + case MTR_MEMO_PAGE_X_FIX: + break; + default: + ut_ad("invalid type" == 0); + break; + } +#endif /* UNIV_DEBUG */ + buf_block_t *block= reinterpret_cast<buf_block_t*>(slot->object); + byte *page = block->frame; + ulonglong page_id_raw = block->page.id().raw(); + if (!m_visited_pages.count(page_id_raw)) { + static_assert(FIL_PAGE_SPACE_OR_CHKSUM == FIL_PAGE_OFFSET - 4, + "compatibility"); + static_assert(FIL_PAGE_TYPE == FIL_PAGE_LSN + 8, "compatibility"); + uint32_t crc = mtr_t::page_crc(page); + lsn_t lsn = mach_read_from_8(page + FIL_PAGE_LSN); + if (!m_mtr.page_is_freed(block->page.id())) + m_mtr.page_checksum(block->page.id(), crc, lsn); + m_visited_pages.insert(page_id_raw); + } + } + return true; + } + +private: + mtr_t &m_mtr; + std::unordered_set<ulonglong> m_visited_pages; +}; + + #ifdef UNIV_DEBUG /** Check that all slots have been handled. */ struct DebugCheck { @@ -400,6 +445,12 @@ void mtr_t::commit() { ut_ad(!srv_read_only_mode || m_log_mode == MTR_LOG_NO_REDO); + if (srv_redo_log_checksum) + { + Iterate<WriteOptionCRC> iteration(WriteOptionCRC(*this)); + m_memo.for_each_block(iteration); + } + std::pair<lsn_t,bool> lsns; if (const ulint len= prepare_write()) @@ -969,7 +1020,7 @@ bool mtr_t::memo_contains(const rw_lock_t &lock, mtr_memo_type_t type) /** Check if we are holding exclusive tablespace latch @param space tablespace to search for @return whether space.latch is being held */ -bool mtr_t::memo_contains(const fil_space_t& space) +bool mtr_t::memo_contains(const fil_space_t& space) const { Iterate<Find> iteration(Find(&space, MTR_MEMO_SPACE_X_LOCK)); if (m_memo.for_each_block_in_reverse(iteration)) diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 71ae6e04881..f8fdbbb1443 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -392,6 +392,8 @@ my_bool innodb_encrypt_temporary_tables; my_bool srv_immediate_scrub_data_uncompressed; +my_bool srv_redo_log_checksum; + /* Array of English strings describing the current state of an i/o handler thread */ |