summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2019-11-17 21:57:16 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2019-11-19 00:07:06 +0200
commitb80df9eba23b4eb9694e770a41135127c6dbc5df (patch)
treeefcdc041750474de31ed8406e7ddc7e9be76478b
parentbd2b05df6c5806e599698eb239b3e44b971eb031 (diff)
downloadmariadb-git-b80df9eba23b4eb9694e770a41135127c6dbc5df.tar.gz
MDEV-21069 Crash on DROP TABLE if the data file is corrupted
buf_read_ibuf_merge_pages(): Discard any page numbers that are outside the current bounds of the tablespace, by invoking the function ibuf_delete_recs() that was introduced in MDEV-20934. This could avoid an infinite change buffer merge loop on innodb_fast_shutdown=0, because normally the change buffer merge would only be attempted if a page was successfully loaded into the buffer pool. dict_drop_index_tree(): Add the parameter trx_t*. To prevent the DROP TABLE crash, do not invoke btr_free_if_exists() if the entire .ibd file will be dropped. Thus, we will avoid a crash if the BTR_SEG_LEAF or BTR_SEG_TOP of the index is corrupted, and we will also avoid unnecessarily accessing the to-be-dropped tablespace via the buffer pool. In MariaDB 10.2, we disable the DROP TABLE fix if innodb_safe_truncate=0, because the backup-unsafe MySQL 5.7 WL#6501 form of TRUNCATE TABLE requires that the individual pages be freed inside the tablespace.
-rw-r--r--mysql-test/suite/innodb/t/ibuf_not_empty.test26
-rw-r--r--storage/innobase/buf/buf0rea.cc33
-rw-r--r--storage/innobase/dict/dict0crea.cc24
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc2
-rw-r--r--storage/innobase/include/dict0crea.h10
-rw-r--r--storage/innobase/include/ibuf0ibuf.h8
-rw-r--r--storage/innobase/row/row0trunc.cc20
-rw-r--r--storage/innobase/row/row0uins.cc3
-rw-r--r--storage/innobase/row/row0upd.cc8
9 files changed, 90 insertions, 44 deletions
diff --git a/mysql-test/suite/innodb/t/ibuf_not_empty.test b/mysql-test/suite/innodb/t/ibuf_not_empty.test
index 8b16d197e03..a5093892b35 100644
--- a/mysql-test/suite/innodb/t/ibuf_not_empty.test
+++ b/mysql-test/suite/innodb/t/ibuf_not_empty.test
@@ -73,8 +73,32 @@ EOF
--replace_regex /contains \d+ entries/contains #### entries/
check table t1;
+--source include/shutdown_mysqld.inc
+
+# Truncate the file to 5 pages, as if it were empty
+perl;
+do "$ENV{MTR_SUITE_DIR}/include/crc32.pl";
+my $file = "$ENV{MYSQLD_DATADIR}/test/t1.ibd";
+open(FILE, "+<$file") || die "Unable to open $file";
+binmode FILE;
+my $ps= $ENV{PAGE_SIZE};
+my $pages=5;
+my $page;
+die "Unable to read $file" unless sysread(FILE, $page, $ps) == $ps;
+substr($page,46,4)=pack("N", $pages);
+my $polynomial = 0x82f63b78; # CRC-32C
+my $ck= pack("N",mycrc32(substr($page, 4, 22), 0, $polynomial) ^
+ mycrc32(substr($page, 38, $ps - 38 - 8), 0, $polynomial));
+substr($page,0,4)=$ck;
+substr($page,$ps-8,4)=$ck;
+sysseek(FILE, 0, 0) || die "Unable to rewind $file\n";
+syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n";
+truncate(FILE, $ps * $pages);
+close(FILE) || die "Unable to close $file";
+EOF
+
--let $restart_parameters=
---source include/restart_mysqld.inc
+--source include/start_mysqld.inc
SET GLOBAL innodb_fast_shutdown=0;
--source include/restart_mysqld.inc
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 188d0aa24b6..e25c0e853e4 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -819,11 +819,8 @@ buf_read_ibuf_merge_pages(
#endif
for (ulint i = 0; i < n_stored; i++) {
- bool found;
- const page_size_t page_size(fil_space_get_page_size(
- space_ids[i], &found));
-
- if (!found) {
+ fil_space_t* space = fil_space_acquire_silent(space_ids[i]);
+ if (!space) {
tablespace_deleted:
/* The tablespace was not found: remove all
entries for it */
@@ -835,6 +832,19 @@ tablespace_deleted:
continue;
}
+ if (UNIV_UNLIKELY(page_nos[i] >= space->size)) {
+ do {
+ ibuf_delete_recs(page_id_t(space_ids[i],
+ page_nos[i]));
+ } while (++i < n_stored
+ && space_ids[i - 1] == space_ids[i]
+ && page_nos[i] >= space->size);
+ i--;
+next:
+ fil_space_release(space);
+ continue;
+ }
+
const page_id_t page_id(space_ids[i], page_nos[i]);
buf_pool_t* buf_pool = buf_pool_get(page_id);
@@ -849,8 +859,8 @@ tablespace_deleted:
buf_read_page_low(&err,
sync && (i + 1 == n_stored),
0,
- BUF_READ_ANY_PAGE, page_id, page_size,
- true, true /* ignore_missing_space */);
+ BUF_READ_ANY_PAGE, page_id,
+ page_size_t(space->flags), true);
switch(err) {
case DB_SUCCESS:
@@ -858,15 +868,20 @@ tablespace_deleted:
case DB_ERROR:
break;
case DB_TABLESPACE_DELETED:
+ fil_space_release(space);
goto tablespace_deleted;
case DB_PAGE_CORRUPTED:
case DB_DECRYPTION_FAILED:
- ib::error() << "Failed to read or decrypt " << page_id
- << " for change buffer merge";
+ ib::error() << "Failed to read or decrypt page "
+ << page_nos[i]
+ << " of '" << space->chain.start->name
+ << "' for change buffer merge";
break;
default:
ut_error;
}
+
+ goto next;
}
os_aio_simulated_wake_handler_threads();
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index 0687875211a..4ef87dae22d 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -954,17 +954,13 @@ dict_create_index_tree_in_mem(
/** Drop the index tree associated with a row in SYS_INDEXES table.
@param[in,out] rec SYS_INDEXES record
@param[in,out] pcur persistent cursor on rec
+@param[in,out] trx dictionary transaction
@param[in,out] mtr mini-transaction
@return whether freeing the B-tree was attempted */
-bool
-dict_drop_index_tree(
- rec_t* rec,
- btr_pcur_t* pcur,
- mtr_t* mtr)
+bool dict_drop_index_tree(rec_t* rec, btr_pcur_t* pcur, trx_t* trx, mtr_t* mtr)
{
const byte* ptr;
ulint len;
- ulint space;
ulint root_page_no;
ut_ad(mutex_own(&dict_sys->mutex));
@@ -991,7 +987,15 @@ dict_drop_index_tree(
ut_ad(len == 4);
- space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
+ const uint32_t space_id = mach_read_from_4(ptr);
+ ut_ad(space_id < SRV_TMP_SPACE_ID);
+ if (space_id != TRX_SYS_SPACE
+ && srv_safe_truncate
+ && trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE) {
+ /* We are about to delete the entire .ibd file;
+ do not bother to free pages inside it. */
+ return false;
+ }
ptr = rec_get_nth_field_old(
rec, DICT_FLD__SYS_INDEXES__ID, &len);
@@ -999,7 +1003,7 @@ dict_drop_index_tree(
ut_ad(len == 8);
bool found;
- const page_size_t page_size(fil_space_get_page_size(space,
+ const page_size_t page_size(fil_space_get_page_size(space_id,
&found));
if (!found) {
@@ -1012,11 +1016,11 @@ dict_drop_index_tree(
/* If tablespace is scheduled for truncate, do not try to drop
the indexes in that tablespace. There is a truncate fixup action
which will take care of it. */
- if (srv_is_tablespace_truncated(space)) {
+ if (srv_is_tablespace_truncated(space_id)) {
return(false);
}
- btr_free_if_exists(page_id_t(space, root_page_no), page_size,
+ btr_free_if_exists(page_id_t(space_id, root_page_no), page_size,
mach_read_from_8(ptr), mtr);
return(true);
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index ce6bfe02351..7bf00eaa7c8 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -4333,7 +4333,7 @@ This prevents an infinite loop on slow shutdown
in the case where the change buffer bitmap claims that no buffered
changes exist, while entries exist in the change buffer tree.
@param page_id page number for which there should be no unbuffered changes */
-ATTRIBUTE_COLD static void ibuf_delete_recs(const page_id_t page_id)
+ATTRIBUTE_COLD void ibuf_delete_recs(const page_id_t page_id)
{
ulint dops[IBUF_OP_COUNT];
mtr_t mtr;
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
index 67afa099ccc..7d87686e010 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innobase/include/dict0crea.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, 2018, MariaDB Corporation.
+Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -121,13 +121,11 @@ dict_recreate_index_tree(
/** Drop the index tree associated with a row in SYS_INDEXES table.
@param[in,out] rec SYS_INDEXES record
@param[in,out] pcur persistent cursor on rec
+@param[in,out] trx dictionary transaction
@param[in,out] mtr mini-transaction
@return whether freeing the B-tree was attempted */
-bool
-dict_drop_index_tree(
- rec_t* rec,
- btr_pcur_t* pcur,
- mtr_t* mtr);
+bool dict_drop_index_tree(rec_t* rec, btr_pcur_t* pcur, trx_t* trx, mtr_t* mtr)
+ MY_ATTRIBUTE((nonnull));
/***************************************************************//**
Creates an index tree for the index if it is not a member of a cluster.
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index 09dc61496f3..a69b63ee16b 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -326,6 +326,14 @@ ibuf_insert(
const page_size_t& page_size,
que_thr_t* thr);
+/**
+Delete any buffered entries for a page.
+This prevents an infinite loop on slow shutdown
+in the case where the change buffer bitmap claims that no buffered
+changes exist, while entries exist in the change buffer tree.
+@param page_id page number for which there should be no unbuffered changes */
+ATTRIBUTE_COLD void ibuf_delete_recs(const page_id_t page_id);
+
/** When an index page is read from a disk to the buffer pool, this function
applies any buffered operations to the page and deletes the entries from the
insert buffer. If the page is not read, but created in the buffer pool, this
diff --git a/storage/innobase/row/row0trunc.cc b/storage/innobase/row/row0trunc.cc
index 648a83f7dc6..5e512c602e6 100644
--- a/storage/innobase/row/row0trunc.cc
+++ b/storage/innobase/row/row0trunc.cc
@@ -748,14 +748,10 @@ public:
Constructor
@param[in,out] table Table to truncate
+ @param[in,out] trx dictionary transaction
@param[in] noredo whether to disable redo logging */
- DropIndex(dict_table_t* table, bool noredo)
- :
- Callback(table->id, noredo),
- m_table(table)
- {
- /* No op */
- }
+ DropIndex(dict_table_t* table, trx_t* trx, bool noredo)
+ : Callback(table->id, noredo), m_trx(trx), m_table(table) {}
/**
@param mtr mini-transaction covering the read
@@ -764,8 +760,10 @@ public:
dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const;
private:
+ /** dictionary transaction */
+ trx_t* const m_trx;
/** Table to be truncated */
- dict_table_t* m_table;
+ dict_table_t* const m_table;
};
/** Callback to create the indexes during TRUNCATE */
@@ -907,7 +905,7 @@ DropIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
{
rec_t* rec = btr_pcur_get_rec(pcur);
- bool freed = dict_drop_index_tree(rec, pcur, mtr);
+ bool freed = dict_drop_index_tree(rec, pcur, m_trx, mtr);
#ifdef UNIV_DEBUG
{
@@ -1122,7 +1120,7 @@ row_truncate_rollback(
it can be recovered using drop/create sequence. */
dict_table_x_lock_indexes(table);
- DropIndex dropIndex(table, no_redo);
+ DropIndex dropIndex(table, trx, no_redo);
SysIndexIterator().for_each(dropIndex);
@@ -1936,7 +1934,7 @@ dberr_t row_truncate_table_for_mysql(dict_table_t* table, trx_t* trx)
indexes) */
if (!dict_table_is_temporary(table)) {
- DropIndex dropIndex(table, no_redo);
+ DropIndex dropIndex(table, trx, no_redo);
err = SysIndexIterator().for_each(dropIndex);
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 62a6f013255..7d0664006bb 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -126,7 +126,8 @@ row_undo_ins_remove_clust_rec(
ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
dict_drop_index_tree(
- btr_pcur_get_rec(&node->pcur), &(node->pcur), &mtr);
+ btr_pcur_get_rec(&node->pcur), &node->pcur, node->trx,
+ &mtr);
mtr.commit();
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index db4b7b18269..0153d618ab8 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -3092,9 +3092,7 @@ row_upd_clust_step(
ulint mode;
- DEBUG_SYNC_C_IF_THD(
- thr_get_trx(thr)->mysql_thd,
- "innodb_row_upd_clust_step_enter");
+ DEBUG_SYNC_C_IF_THD(trx->mysql_thd, "innodb_row_upd_clust_step_enter");
if (dict_index_is_online_ddl(index)) {
ut_ad(node->table->id != DICT_INDEXES_ID);
@@ -3123,7 +3121,7 @@ row_upd_clust_step(
ut_ad(!dict_index_is_online_ddl(index));
dict_drop_index_tree(
- btr_pcur_get_rec(pcur), pcur, &mtr);
+ btr_pcur_get_rec(pcur), pcur, trx, &mtr);
mtr.commit();
@@ -3155,7 +3153,7 @@ row_upd_clust_step(
}
}
- ut_ad(lock_trx_has_rec_x_lock(thr_get_trx(thr), index->table,
+ ut_ad(lock_trx_has_rec_x_lock(trx, index->table,
btr_pcur_get_block(pcur),
page_rec_get_heap_no(rec)));