summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2020-12-11 09:05:26 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2020-12-11 09:05:26 +0200
commit8677c14e65f436db00be5aedb1f644fcffc70f7e (patch)
tree45f9212af72f0163198822f175956259318beb02
parent0c7c449267655ed759f223067c5095d7df3665b3 (diff)
downloadmariadb-git-bb-10.5-MDEV-24391.tar.gz
MDEV-24391 heap-use-after-free in fil_space_t::flush_low()bb-10.5-MDEV-24391
We observed a race condition that involved two threads executing fil_flush_file_spaces() and one thread executing fil_delete_tablespace(). After one of the fil_flush_file_spaces() observed that space.needs_flush_not_stopping() is set and was releasing the fil_system.mutex, the other fil_flush_file_spaces() would complete the execution of fil_space_t::flush_low() on the same tablespace. Then, fil_delete_tablespace() would destroy the object, because the value of fil_space_t::n_pending did not prevent that. Finally, the fil_flush_file_spaces() would resume execution and invoke fil_space_t::flush_low() on the freed object. This race condition was introduced in commit 118e258aaac5da75a2ac4556201aaea3688fac67 of MDEV-23855. fil_space_t::flush(): Add a template parameter that indicates whether the caller is holding a reference to prevent the tablespace from being freed. buf_dblwr_t::flush_buffered_writes_completed(), row_quiesce_table_start(): Acquire a reference for the duration of the fil_space_t::flush_low() operation. It should be impossible for the object to be freed in these code paths, but we want to satisfy the debug assertions. fil_space_t::flush_low(): Do not increment or decrement the reference count, but instead assert that the caller is holding a reference. fil_space_extend_must_retry(), fil_flush_file_spaces(): Acquire a reference before releasing fil_system.mutex. This is what will fix the race condition.
-rw-r--r--extra/mariabackup/xtrabackup.cc2
-rw-r--r--storage/innobase/buf/buf0dblwr.cc2
-rw-r--r--storage/innobase/fil/fil0fil.cc17
-rw-r--r--storage/innobase/include/fil0fil.h13
-rw-r--r--storage/innobase/row/row0quiesce.cc2
5 files changed, 21 insertions, 15 deletions
diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc
index d44e212d58a..94b10019e1d 100644
--- a/extra/mariabackup/xtrabackup.cc
+++ b/extra/mariabackup/xtrabackup.cc
@@ -552,7 +552,7 @@ void CorruptedPages::zero_out_free_pages()
*page_it, space_name.c_str());
}
}
- space->flush();
+ space->flush<true>();
space->release();
}
m_spaces.swap(non_free_pages);
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 4e33bc58b53..6a4a6ced074 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -648,7 +648,7 @@ void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request)
mysql_mutex_unlock(&mutex);
/* Now flush the doublewrite buffer data to disk */
- fil_system.sys_space->flush();
+ fil_system.sys_space->flush<false>();
/* The writes have been flushed to disk now and in recovery we will
find them in the doublewrite buffer blocks. Next, write the data pages. */
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index b3d7aa2568b..2734d6bc68d 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -496,18 +496,16 @@ void fil_space_t::flush_low()
{
ut_ad(!mutex_own(&fil_system.mutex));
- uint32_t n= 0;
- while (!n_pending.compare_exchange_strong(n, (n + 1) | NEEDS_FSYNC,
+ uint32_t n= 1;
+ while (!n_pending.compare_exchange_strong(n, n | NEEDS_FSYNC,
std::memory_order_acquire,
std::memory_order_relaxed))
{
+ ut_ad(n & PENDING);
if (n & STOPPING)
return;
- if (!(n & NEEDS_FSYNC))
- continue;
- if (acquire_low() & STOPPING)
- return;
- break;
+ if (n & NEEDS_FSYNC)
+ break;
}
fil_n_pending_tablespace_flushes++;
@@ -535,7 +533,6 @@ void fil_space_t::flush_low()
}
clear_flush();
- release();
fil_n_pending_tablespace_flushes--;
}
@@ -632,8 +629,10 @@ fil_space_extend_must_retry(
case TRX_SYS_SPACE:
srv_sys_space.set_last_file_size(pages_in_MiB);
do_flush:
+ space->reacquire();
mutex_exit(&fil_system.mutex);
space->flush_low();
+ space->release();
mutex_enter(&fil_system.mutex);
break;
default:
@@ -3516,8 +3515,10 @@ rescan:
{
if (space.needs_flush_not_stopping())
{
+ space.reacquire();
mutex_exit(&fil_system.mutex);
space.flush_low();
+ space.release();
goto rescan;
}
}
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index e645ce31232..79dd6933056 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -972,7 +972,7 @@ public:
fil_io_t io(const IORequest &type, os_offset_t offset, size_t len,
void *buf, buf_page_t *bpage= nullptr);
/** Flush pending writes from the file system cache to the file. */
- inline void flush();
+ template<bool have_reference> inline void flush();
/** Flush pending writes from the file system cache to the file. */
void flush_low();
@@ -1452,18 +1452,23 @@ inline void fil_space_t::set_stopping(bool stopping)
}
/** Flush pending writes from the file system cache to the file. */
-inline void fil_space_t::flush()
+template<bool have_reference> inline void fil_space_t::flush()
{
ut_ad(!mutex_own(&fil_system.mutex));
-
+ ut_ad(!have_reference || (pending() & PENDING));
ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT);
if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)
{
ut_ad(!is_in_unflushed_spaces);
ut_ad(!needs_flush());
}
- else
+ else if (have_reference)
+ flush_low();
+ else if (!(acquire_low() & STOPPING))
+ {
flush_low();
+ release();
+ }
}
/** @return the size in pages (0 if unreadable) */
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index 0bdf52dfd56..8e0581dfa9b 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -545,7 +545,7 @@ row_quiesce_table_start(
if (!trx_is_interrupted(trx)) {
/* Ensure that all asynchronous IO is completed. */
os_aio_wait_until_no_pending_writes();
- table->space->flush();
+ table->space->flush<false>();
if (row_quiesce_write_cfg(table, trx->mysql_thd)
!= DB_SUCCESS) {