summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-09-24 08:22:19 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2021-09-24 08:22:19 +0300
commitf5fddae3cbcff2d2531f0ce61bd144212379aa42 (patch)
tree35a029dffb510fe90dd6ba7a0120c6b7f67aa6de
parent15efb7ed48265b8d40897a13c0b8e09c6bdd34c9 (diff)
downloadmariadb-git-f5fddae3cbcff2d2531f0ce61bd144212379aa42.tar.gz
MDEV-26450: Corruption due to innodb_undo_log_truncate
At least since commit 055a3334adc004bd3a897990c2f93178e6bb5f90 (MDEV-13564) the undo log truncation in InnoDB did not work correctly. The main issue is that during the execution of trx_purge_truncate_history() some pages of the newly truncated undo tablespace could be discarded. This is improved from commit 1cb218c37cc3fe01a1ff2fe9b1cbfb591e90d5ce which was applied to earlier-version branches. fsp_try_extend_data_file(): Apply the peculiar rounding of fil_space_t::size_in_header only to the system tablespace, whose size can be expressed in megabytes in a configuration parameter. Other files may freely grow by a number of pages. fseg_alloc_free_page_low(): Do allow the extension of undo tablespaces, and mention the file name in the error message. mtr_t::commit_shrink(): Implement crash-safe shrinking of a tablespace: (1) durably write the log (2) release the page latches of the rebuilt tablespace (3) release the mutexes (4) truncate the file (5) release the tablespace latch This is refactored from trx_purge_truncate_history(). log_write_and_flush_prepare(), log_write_and_flush(): New functions to durably write log during mtr_t::commit_shrink().
-rw-r--r--mysql-test/suite/innodb/r/undo_truncate.result22
-rw-r--r--mysql-test/suite/innodb/t/undo_truncate.test64
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc18
-rw-r--r--storage/innobase/include/log0log.h16
-rw-r--r--storage/innobase/include/mtr0mtr.h4
-rw-r--r--storage/innobase/include/mtr0mtr.ic4
-rw-r--r--storage/innobase/log/log0log.cc32
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc111
-rw-r--r--storage/innobase/trx/trx0purge.cc21
9 files changed, 173 insertions, 119 deletions
diff --git a/mysql-test/suite/innodb/r/undo_truncate.result b/mysql-test/suite/innodb/r/undo_truncate.result
index ad236bdecd4..54eeee9a9df 100644
--- a/mysql-test/suite/innodb/r/undo_truncate.result
+++ b/mysql-test/suite/innodb/r/undo_truncate.result
@@ -7,28 +7,12 @@ SET @trunc_start=
WHERE variable_name = 'innodb_undo_truncations');
create table t1(keyc int primary key, c char(100)) engine = innodb;
create table t2(keyc int primary key, c char(100)) engine = innodb;
-CREATE PROCEDURE populate_t1()
-BEGIN
-DECLARE i INT DEFAULT 1;
-while (i <= 20000) DO
-insert into t1 values (i, 'a');
-SET i = i + 1;
-END WHILE;
-END |
-CREATE PROCEDURE populate_t2()
-BEGIN
-DECLARE i INT DEFAULT 1;
-while (i <= 20000) DO
-insert into t2 values (i, 'a');
-SET i = i + 1;
-END WHILE;
-END |
connect con1,localhost,root,,;
begin;
-call populate_t1();
+insert into t1 select seq,'a' from seq_1_to_20000;
connect con2,localhost,root,,;
begin;
-call populate_t2();
+insert into t2 select seq,'a' from seq_1_to_20000;
connection con1;
update t1 set c = 'mysql';
connection con2;
@@ -50,8 +34,6 @@ commit;
disconnect con2;
connection default;
drop table t1, t2;
-drop PROCEDURE populate_t1;
-drop PROCEDURE populate_t2;
InnoDB 0 transactions not purged
SET GLOBAL innodb_purge_rseg_truncate_frequency = @save_frequency;
SET GLOBAL innodb_undo_log_truncate = @save_truncate;
diff --git a/mysql-test/suite/innodb/t/undo_truncate.test b/mysql-test/suite/innodb/t/undo_truncate.test
index d2a4e287305..9abca6179c4 100644
--- a/mysql-test/suite/innodb/t/undo_truncate.test
+++ b/mysql-test/suite/innodb/t/undo_truncate.test
@@ -1,6 +1,7 @@
--source include/have_innodb.inc
--source include/innodb_page_size.inc
--source include/have_undo_tablespaces.inc
+--source include/have_sequence.inc
SET @save_frequency = @@GLOBAL.innodb_purge_rseg_truncate_frequency;
SET @save_truncate = @@GLOBAL.innodb_undo_log_truncate;
@@ -19,37 +20,14 @@ WHERE variable_name = 'innodb_undo_truncations');
create table t1(keyc int primary key, c char(100)) engine = innodb;
create table t2(keyc int primary key, c char(100)) engine = innodb;
#
-delimiter |;
-CREATE PROCEDURE populate_t1()
-BEGIN
- DECLARE i INT DEFAULT 1;
- while (i <= 20000) DO
- insert into t1 values (i, 'a');
- SET i = i + 1;
- END WHILE;
-END |
-delimiter ;|
-#
-delimiter |;
-CREATE PROCEDURE populate_t2()
-BEGIN
- DECLARE i INT DEFAULT 1;
- while (i <= 20000) DO
- insert into t2 values (i, 'a');
- SET i = i + 1;
- END WHILE;
-END |
-delimiter ;|
-#
-#
let DATADIR = `select @@datadir`;
connect (con1,localhost,root,,);
begin;
-send call populate_t1();
+send insert into t1 select seq,'a' from seq_1_to_20000;
connect (con2,localhost,root,,);
begin;
-send call populate_t2();
+send insert into t2 select seq,'a' from seq_1_to_20000;
connection con1; reap; send update t1 set c = 'mysql';
connection con2; reap; send update t2 set c = 'mysql';
@@ -59,25 +37,12 @@ connection con1; reap; send delete from t1;
connection con2; reap; delete from t2;
connection con1; reap;
-let CHECKFILE = $MYSQL_TMP_DIR/check.txt;
-perl;
-($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size1)
- = stat("$ENV{DATADIR}/undo001");
-($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size2)
- = stat("$ENV{DATADIR}/undo002");
-open(OUT, ">$ENV{CHECKFILE}") || die;
-print OUT "let \$size1='$size1,$size2';\n";
-close(OUT);
-EOF
-
SET GLOBAL innodb_undo_log_truncate = 1;
commit; disconnect con1;
connection con2; commit; disconnect con2;
connection default;
drop table t1, t2;
-drop PROCEDURE populate_t1;
-drop PROCEDURE populate_t2;
--source include/wait_all_purged.inc
@@ -93,28 +58,5 @@ if (`select @@innodb_page_size IN (8192,16384)`)
source include/wait_condition.inc;
}
---source $CHECKFILE
-perl;
-($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size1)
- = stat("$ENV{DATADIR}/undo001");
-($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size2)
- = stat("$ENV{DATADIR}/undo002");
-open(OUT, ">$ENV{CHECKFILE}") || die;
-print OUT "let \$size2='$size1,$size2';\n";
-close(OUT);
-EOF
-
---source $CHECKFILE
---remove_file $CHECKFILE
-
-if ($size1 == $size2)
-{
- # This fails for innodb_page_size=64k, occasionally also for 32k.
- if (`select @@innodb_page_size IN (8192,16384)`)
- {
- echo Truncation did not happen: $size1;
- }
-}
-
SET GLOBAL innodb_purge_rseg_truncate_frequency = @save_frequency;
SET GLOBAL innodb_undo_log_truncate = @save_truncate;
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index 3d5a7edd947..ae2ea90c3e5 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -586,7 +586,7 @@ void fsp_header_init(fil_space_t* space, uint32_t size, mtr_t* mtr)
in order to avoid optimizing away any unchanged most
significant bytes of FSP_SIZE. */
mtr->write<4,mtr_t::FORCED>(*block, FSP_HEADER_OFFSET + FSP_SIZE
- + block->frame, size);
+ + block->frame, size);
ut_ad(0 == mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT
+ block->frame));
if (auto f = space->flags & ~FSP_FLAGS_MEM_MASK) {
@@ -780,10 +780,12 @@ fsp_try_extend_data_file(fil_space_t *space, buf_block_t *header, mtr_t *mtr)
return(0);
}
- /* We ignore any fragments of a full megabyte when storing the size
- to the space header */
+ /* For the system tablespace, we ignore any fragments of a
+ full megabyte when storing the size to the space header */
- space->size_in_header = ut_2pow_round(space->size, (1024 * 1024) / ps);
+ space->size_in_header = space->id
+ ? space->size
+ : ut_2pow_round(space->size, (1024 * 1024) / ps);
/* recv_sys_t::parse() expects to find a WRITE record that
covers all 4 bytes. Therefore, we must specify mtr_t::FORCED
@@ -2130,14 +2132,14 @@ take_hinted_page:
return(NULL);
}
- if (space->size <= ret_page && !is_system_tablespace(space_id)) {
+ if (space->size <= ret_page && !is_predefined_tablespace(space_id)) {
/* It must be that we are extending a single-table
tablespace whose size is still < 64 pages */
if (ret_page >= FSP_EXTENT_SIZE) {
- ib::error() << "Error (2): trying to extend"
- " a single-table tablespace " << space_id
- << " by single page(s) though the"
+ ib::error() << "Trying to extend '"
+ << space->chain.start->name
+ << "' by single page(s) though the"
<< " space size " << space->size
<< ". Page no " << ret_page << ".";
ut_ad(!has_done_reservation);
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 460acaf5b66..1dcff513d7c 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -107,12 +107,16 @@ be flushed to the file system
@param[in] rotate_key whether to rotate the encryption key */
void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key = false);
-/** write to the log file up to the last log entry.
-@param[in] sync whether we want the written log
-also to be flushed to disk. */
-void
-log_buffer_flush_to_disk(
- bool sync = true);
+/** Write to the log file up to the last log entry.
+@param sync whether to wait for a durable write to complete */
+void log_buffer_flush_to_disk(bool sync= true);
+
+
+/** Prepare to invoke log_write_and_flush(), before acquiring log_sys.mutex. */
+ATTRIBUTE_COLD void log_write_and_flush_prepare();
+
+/** Durably write the log up to log_sys.lsn() and release log_sys.mutex. */
+ATTRIBUTE_COLD void log_write_and_flush();
/** Make a checkpoint */
ATTRIBUTE_COLD void log_make_checkpoint();
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index f3db0008c3e..e253038e1ee 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -98,6 +98,10 @@ struct mtr_t {
/** Commit the mini-transaction. */
void commit();
+ /** Commit a mini-transaction that is shrinking a tablespace.
+ @param space tablespace that is being shrunk */
+ ATTRIBUTE_COLD void commit_shrink(fil_space_t &space);
+
/** Commit a mini-transaction that did not modify any pages,
but generated some redo log on a higher level, such as
FILE_MODIFY records and an optional FILE_CHECKPOINT marker.
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic
index 4a483379e21..48bdb9bb6d1 100644
--- a/storage/innobase/include/mtr0mtr.ic
+++ b/storage/innobase/include/mtr0mtr.ic
@@ -48,8 +48,8 @@ mtr_t::memo_push(void* object, mtr_memo_type_t type)
/* If this mtr has x-fixed a clean page then we set
the made_dirty flag. This tells us if we need to
- grab log_flush_order_mutex at mtr_commit so that we
- can insert the dirtied page to the flush list. */
+ grab log_sys.flush_order_mutex at mtr_t::commit() so that we
+ can insert the dirtied page into the flush list. */
if ((type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX)
&& !m_made_dirty) {
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index a6fa50dd753..257645cb6a4 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -835,15 +835,41 @@ void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key)
log_flush_notify(flush_lsn);
}
-/** write to the log file up to the last log entry.
-@param[in] sync whether we want the written log
-also to be flushed to disk. */
+/** Write to the log file up to the last log entry.
+@param sync whether to wait for a durable write to complete */
void log_buffer_flush_to_disk(bool sync)
{
ut_ad(!srv_read_only_mode);
log_write_up_to(log_sys.get_lsn(std::memory_order_acquire), sync);
}
+/** Prepare to invoke log_write_and_flush(), before acquiring log_sys.mutex. */
+ATTRIBUTE_COLD void log_write_and_flush_prepare()
+{
+ mysql_mutex_assert_not_owner(&log_sys.mutex);
+
+ while (flush_lock.acquire(log_sys.get_lsn() + 1) !=
+ group_commit_lock::ACQUIRED);
+ while (write_lock.acquire(log_sys.get_lsn() + 1) !=
+ group_commit_lock::ACQUIRED);
+}
+
+/** Durably write the log and release log_sys.mutex */
+ATTRIBUTE_COLD void log_write_and_flush()
+{
+ ut_ad(!srv_read_only_mode);
+ auto lsn= log_sys.get_lsn();
+ write_lock.set_pending(lsn);
+ log_write(false);
+ ut_a(log_sys.write_lsn == lsn);
+ write_lock.release(lsn);
+
+ lsn= write_lock.value();
+ flush_lock.set_pending(lsn);
+ log_write_flush_to_disk_low(lsn);
+ flush_lock.release(lsn);
+}
+
/********************************************************************
Tries to establish a big enough margin of free space in the log buffer, such
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 691b393561f..cf1574a56c4 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2020, MariaDB Corporation.
+Copyright (c) 2017, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -459,6 +459,115 @@ void mtr_t::commit()
release_resources();
}
+#ifdef UNIV_DEBUG
+/** Check that all pages belong to a shrunk tablespace. */
+struct Shrink
+{
+ const page_id_t low, high;
+ Shrink(const fil_space_t &space) :
+ low({space.id, 0}), high({space.id, space.size}) {}
+
+ bool operator()(const mtr_memo_slot_t *slot) const
+ {
+ if (!slot->object)
+ return true;
+ switch (slot->type) {
+ default:
+ ut_ad("invalid type" == 0);
+ return false;
+ case MTR_MEMO_SPACE_X_LOCK:
+ ut_ad(low.space() == static_cast<fil_space_t*>(slot->object)->id);
+ return true;
+ case MTR_MEMO_PAGE_X_MODIFY:
+ case MTR_MEMO_PAGE_SX_MODIFY:
+ case MTR_MEMO_PAGE_X_FIX:
+ case MTR_MEMO_PAGE_SX_FIX:
+ const auto &bpage= static_cast<buf_block_t*>(slot->object)->page;
+ const auto id= bpage.id();
+ if (id == page_id_t{0, TRX_SYS_PAGE_NO})
+ {
+ ut_ad(srv_is_undo_tablespace(low.space()));
+ break;
+ }
+ ut_ad(id >= low);
+ ut_ad(id < high);
+ ut_ad(bpage.state() == BUF_BLOCK_FILE_PAGE);
+ ut_ad(bpage.oldest_modification() <= 1);
+ break;
+ }
+ return true;
+ }
+};
+#endif
+
+/** Commit a mini-transaction that is shrinking a tablespace.
+@param space tablespace that is being shrunk */
+void mtr_t::commit_shrink(fil_space_t &space)
+{
+ ut_ad(is_active());
+ ut_ad(!is_inside_ibuf());
+ ut_ad(!high_level_read_only);
+ ut_ad(m_modifications);
+ ut_ad(m_made_dirty);
+ ut_ad(!recv_recovery_is_on());
+ ut_ad(m_log_mode == MTR_LOG_ALL);
+ ut_ad(UT_LIST_GET_LEN(space.chain) == 1);
+
+ log_write_and_flush_prepare();
+
+ const lsn_t start_lsn= finish_write(prepare_write()).first;
+
+ mysql_mutex_lock(&log_sys.flush_order_mutex);
+ /* Durably write the reduced FSP_SIZE before truncating the data file. */
+ log_write_and_flush();
+
+ if (m_freed_pages)
+ {
+ ut_ad(!m_freed_pages->empty());
+ ut_ad(m_freed_space == &space);
+ ut_ad(memo_contains(*m_freed_space));
+ ut_ad(is_named_space(m_freed_space));
+ m_freed_space->update_last_freed_lsn(m_commit_lsn);
+
+ if (!is_trim_pages())
+ for (const auto &range : *m_freed_pages)
+ m_freed_space->add_free_range(range);
+ else
+ m_freed_space->clear_freed_ranges();
+ delete m_freed_pages;
+ m_freed_pages= nullptr;
+ m_freed_space= nullptr;
+ /* mtr_t::start() will reset m_trim_pages */
+ }
+ else
+ ut_ad(!m_freed_space);
+
+ ut_d(m_memo.for_each_block_in_reverse(CIterate<Shrink>{space}));
+
+ m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks>
+ (ReleaseBlocks(start_lsn, m_commit_lsn,
+ m_memo)));
+ mysql_mutex_unlock(&log_sys.flush_order_mutex);
+
+ mutex_enter(&fil_system.mutex);
+ ut_ad(space.is_being_truncated);
+ ut_ad(space.is_stopping());
+ space.set_stopping(false);
+ space.is_being_truncated= false;
+ mutex_exit(&fil_system.mutex);
+
+ /* Truncate the file before releasing the space.latch. File extension
+ (and any allocation of pages beyond the current intended end of the file)
+ is covered by exclusive space.latch, which we are still holding here. */
+ os_file_truncate(space.chain.start->name, space.chain.start->handle,
+ os_offset_t{space.size} << srv_page_size_shift, true);
+
+ m_memo.for_each_block_in_reverse(CIterate<ReleaseLatches>());
+ srv_stats.log_write_requests.inc();
+
+ release_resources();
+}
+
/** Commit a mini-transaction that did not modify any pages,
but generated some redo log on a higher level, such as
FILE_MODIFY records and an optional FILE_CHECKPOINT marker.
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 4570d63b3d9..e417d1c5f9f 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -754,27 +754,11 @@ not_free:
rseg->needs_purge = false;
}
- mtr.commit();
- /* Write-ahead the redo log record. */
- log_write_up_to(mtr.commit_lsn(), true);
-
- /* Trim the file size. */
- os_file_truncate(file->name, file->handle,
- os_offset_t(size) << srv_page_size_shift,
- true);
+ mtr.commit_shrink(space);
- /* This is only executed by srv_purge_coordinator_thread. */
+ /* No mutex; this is only updated by the purge coordinator. */
export_vars.innodb_undo_truncations++;
- /* In MDEV-8319 (10.5) we will PUNCH_HOLE the garbage
- (with write-ahead logging). */
- mutex_enter(&fil_system.mutex);
- ut_ad(&space == purge_sys.truncate.current);
- ut_ad(space.is_being_truncated);
- purge_sys.truncate.current->set_stopping(false);
- purge_sys.truncate.current->is_being_truncated = false;
- mutex_exit(&fil_system.mutex);
-
if (purge_sys.rseg != NULL
&& purge_sys.rseg->last_page_no == FIL_NULL) {
/* If purge_sys.rseg is pointing to rseg that
@@ -806,6 +790,7 @@ not_free:
ib::info() << "Truncated " << file->name;
purge_sys.truncate.last = purge_sys.truncate.current;
+ ut_ad(&space == purge_sys.truncate.current);
purge_sys.truncate.current = NULL;
}
}