summaryrefslogtreecommitdiff
path: root/storage/innobase/srv
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2022-01-04 07:40:31 +0200
committerMarko Mäkelä <marko.makela@mariadb.com>2022-01-04 07:40:31 +0200
commit4c3ad24413f234599eda27f4958dd3ff21df3203 (patch)
tree11a4789464111628d22cdb189d1797e0e4ec2161 /storage/innobase/srv
parenteab89f14ab0069a3f74e80bdf13aa70d85738c79 (diff)
downloadmariadb-git-4c3ad24413f234599eda27f4958dd3ff21df3203.tar.gz
MDEV-27416 InnoDB hang in buf_flush_wait_flushed(), on log checkpointbb-10.5-MDEV-27416
InnoDB could sometimes hang when triggering a log checkpoint. This is due to commit 7b1252c03d7131754d9503560fe507b33ca1f8b4 (MDEV-24278), which introduced an untimed wait to buf_flush_page_cleaner(). The hang was noticed by occasional failures of IMPORT TABLESPACE tests, such as innodb.innodb-wl5522, which would (unnecessarily) invoke log_make_checkpoint() from row_import_cleanup(). The reason of the hang was that buf_flush_page_cleaner() would enter untimed sleep despite buf_flush_sync_lsn being set. The exact failure scenario is unclear, because buf_flush_sync_lsn should actually be protected by buf_pool.flush_list_mutex. We prevent the hang by invoking buf_pool.page_cleaner_set_idle(false) whenever we are setting buf_flush_sync_lsn and signaling buf_pool.do_flush_list. The bulk of these changes was originally developed as a preparation for MDEV-26827, to invoke buf_flush_list() from fewer threads, and tested on 10.6 by Matthias Leich. This fix was tested by running 100 repetitions of 100 concurrent instances of the test innodb.innodb-wl5522 on a RelWithDebInfo build, using ext4fs and innodb_flush_method=O_DIRECT on a SATA SSD with 4096-byte block size. During the test, the call to log_make_checkpoint() in row_import_cleanup() was present. buf_flush_list(): Make static. buf_flush_wait(): Wait for buf_pool.get_oldest_modification() to reach a target, by work done in the buf_flush_page_cleaner. If buf_flush_sync_lsn is going to be set, we will invoke buf_pool.page_cleaner_set_idle(false). buf_flush_ahead(): If buf_flush_sync_lsn or buf_flush_async_lsn is going to be set and the page cleaner woken up, we will invoke buf_pool.page_cleaner_set_idle(false). buf_flush_wait_flushed(): Invoke buf_flush_wait(). buf_flush_sync(): Invoke recv_sys.apply() at the start in case crash recovery is active. Invoke buf_flush_wait(). buf_flush_sync_batch(): A lower-level variant of buf_flush_sync() that is only called by recv_sys_t::apply(). buf_flush_sync_for_checkpoint(): Do not trigger log apply or checkpoint during recovery. buf_dblwr_t::create(): Only initiate a buffer pool flush, not a checkpoint. row_import_cleanup(): Do not unnecessarily invoke log_make_checkpoint(). Invoking buf_flush_list_space() before starting to generate redo log for the imported tablespace should suffice. srv_prepare_to_delete_redo_log_file(): Set recv_sys.recovery_on in order to prevent buf_flush_sync_for_checkpoint() from initiating a checkpoint while the log is inaccessible. Remove a wait loop that is already part of buf_flush_sync(). Do not invoke fil_names_clear() if the log is being upgraded, because the FILE_MODIFY record is specific to the latest format. create_log_file(): Clear recv_sys.recovery_on only after calling log_make_checkpoint(), to prevent buf_flush_page_cleaner from invoking a checkpoint. innodb_shutdown(): Simplify the logic in mariadb-backup --prepare. os_aio_wait_until_no_pending_writes(): Update the function comment. Apart from row_quiesce_table_start() during FLUSH TABLES...FOR EXPORT, this is being called by buf_flush_list_space(), which is invoked by ALTER TABLE...IMPORT TABLESPACE as well as some encryption operations.
Diffstat (limited to 'storage/innobase/srv')
-rw-r--r--storage/innobase/srv/srv0srv.cc2
-rw-r--r--storage/innobase/srv/srv0start.cc149
2 files changed, 70 insertions, 81 deletions
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 73e67c12d58..c05562daca3 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -557,7 +557,7 @@ char srv_buffer_pool_load_at_startup = TRUE;
#ifdef HAVE_PSI_STAGE_INTERFACE
/** Performance schema stage event for monitoring ALTER TABLE progress
-everything after flush log_make_checkpoint(). */
+in ha_innobase::commit_inplace_alter_table(). */
PSI_stage_info srv_stage_alter_table_end
= {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS};
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index cf936893e1b..afb7834c9b9 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -329,6 +329,13 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn,
log_sys.log.write_header_durable(lsn);
+ ut_ad(srv_startup_is_before_trx_rollback_phase);
+ if (create_new_db) {
+ srv_startup_is_before_trx_rollback_phase = false;
+ }
+
+ /* Enable checkpoints in buf_flush_page_cleaner(). */
+ recv_sys.recovery_on = false;
mysql_mutex_unlock(&log_sys.mutex);
log_make_checkpoint();
@@ -895,91 +902,74 @@ buffer pools. Flush the redo log buffer to the redo log file.
@return lsn upto which data pages have been flushed. */
static lsn_t srv_prepare_to_delete_redo_log_file(bool old_exists)
{
- DBUG_ENTER("srv_prepare_to_delete_redo_log_file");
+ DBUG_ENTER("srv_prepare_to_delete_redo_log_file");
- lsn_t flushed_lsn;
- ulint count = 0;
+ /* Disable checkpoints in the page cleaner. */
+ ut_ad(!recv_sys.recovery_on);
+ recv_sys.recovery_on= true;
- if (log_sys.log.subformat != 2) {
- srv_log_file_size = 0;
- }
+ /* Clean the buffer pool. */
+ buf_flush_sync();
- for (;;) {
- /* Clean the buffer pool. */
- buf_flush_sync();
+ if (log_sys.log.subformat != 2)
+ srv_log_file_size= 0;
- DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0););
- DBUG_PRINT("ib_log", ("After innodb_log_abort_1"));
+ DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0););
+ DBUG_PRINT("ib_log", ("After innodb_log_abort_1"));
- mysql_mutex_lock(&log_sys.mutex);
-
- fil_names_clear(log_sys.get_lsn(), false);
-
- flushed_lsn = log_sys.get_lsn();
-
- {
- ib::info info;
- if (srv_log_file_size == 0
- || (log_sys.log.format & ~log_t::FORMAT_ENCRYPTED)
- != log_t::FORMAT_10_5) {
- info << "Upgrading redo log: ";
- } else if (!old_exists
- || srv_log_file_size
- != srv_log_file_size_requested) {
- if (srv_encrypt_log
- == (my_bool)log_sys.is_encrypted()) {
- info << (srv_encrypt_log
- ? "Resizing encrypted"
- : "Resizing");
- } else if (srv_encrypt_log) {
- info << "Encrypting and resizing";
- } else {
- info << "Removing encryption"
- " and resizing";
- }
-
- info << " redo log from " << srv_log_file_size
- << " to ";
- } else if (srv_encrypt_log) {
- info << "Encrypting redo log: ";
- } else {
- info << "Removing redo log encryption: ";
- }
-
- info << srv_log_file_size_requested
- << " bytes; LSN=" << flushed_lsn;
- }
+ mysql_mutex_lock(&log_sys.mutex);
+ const bool latest_format= (log_sys.log.format & ~log_t::FORMAT_ENCRYPTED) ==
+ log_t::FORMAT_10_5;
+ lsn_t flushed_lsn= log_sys.get_lsn();
- mysql_mutex_unlock(&log_sys.mutex);
+ if (latest_format)
+ {
+ fil_names_clear(flushed_lsn, false);
+ flushed_lsn= log_sys.get_lsn();
+ }
- if (flushed_lsn != log_sys.get_flushed_lsn()) {
- log_write_up_to(flushed_lsn, false);
- log_sys.log.flush();
- }
+ {
+ const char *msg;
+ if (!latest_format || srv_log_file_size == 0)
+ {
+ msg= "Upgrading redo log: ";
+same_size:
+ ib::info() << msg << srv_log_file_size_requested << " bytes; LSN="
+ << flushed_lsn;
+ }
+ else if (old_exists && srv_log_file_size == srv_log_file_size_requested)
+ {
+ msg= srv_encrypt_log
+ ? "Encrypting redo log: " : "Removing redo log encryption: ";
+ goto same_size;
+ }
+ else
+ {
+ if (srv_encrypt_log == (my_bool)log_sys.is_encrypted())
+ msg= srv_encrypt_log ? "Resizing encrypted" : "Resizing";
+ else
+ msg= srv_encrypt_log
+ ? "Encrypting and resizing"
+ : "Removing encryption and resizing";
+
+ ib::info() << msg << " redo log from " << srv_log_file_size << " to "
+ << srv_log_file_size_requested
+ << " bytes; LSN=" << flushed_lsn;
+ }
+ }
- ut_ad(flushed_lsn == log_sys.get_lsn());
-
- /* Check if the buffer pools are clean. If not
- retry till it is clean. */
- if (ulint pending_io = buf_pool.io_pending()) {
- count++;
- /* Print a message every 60 seconds if we
- are waiting to clean the buffer pools */
- if (srv_print_verbose_log && count > 600) {
- ib::info() << "Waiting for "
- << pending_io << " buffer "
- << "page I/Os to complete";
- count = 0;
- }
+ mysql_mutex_unlock(&log_sys.mutex);
- os_thread_sleep(100000);
- continue;
- }
+ if (flushed_lsn != log_sys.get_flushed_lsn())
+ {
+ log_write_up_to(flushed_lsn, false);
+ log_sys.log.flush();
+ }
- break;
- }
+ ut_ad(flushed_lsn == log_sys.get_lsn());
+ ut_ad(!buf_pool.any_io_pending());
- DBUG_RETURN(flushed_lsn);
+ DBUG_RETURN(flushed_lsn);
}
/** Tries to locate LOG_FILE_NAME and check it's size, etc
@@ -1259,7 +1249,7 @@ dberr_t srv_start(bool create_new_db)
ut_ad(buf_page_cleaner_is_active);
}
- srv_startup_is_before_trx_rollback_phase = !create_new_db;
+ srv_startup_is_before_trx_rollback_phase = true;
/* Check if undo tablespaces and redo log files exist before creating
a new system tablespace */
@@ -1308,7 +1298,6 @@ dberr_t srv_start(bool create_new_db)
if (create_new_db) {
flushed_lsn = log_sys.get_lsn();
log_sys.set_flushed_lsn(flushed_lsn);
- buf_flush_sync();
err = create_log_file(true, flushed_lsn, logfile0);
@@ -1371,6 +1360,9 @@ dberr_t srv_start(bool create_new_db)
if (!log_set_capacity(srv_log_file_size_requested)) {
return(srv_init_abort(DB_ERROR));
}
+
+ /* Enable checkpoints in the page cleaner. */
+ recv_sys.recovery_on = false;
}
file_checked:
@@ -2024,11 +2016,8 @@ void innodb_shutdown()
break;
case SRV_OPERATION_RESTORE:
case SRV_OPERATION_RESTORE_EXPORT:
- srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
- if (!buf_page_cleaner_is_active) {
- break;
- }
mysql_mutex_lock(&buf_pool.flush_list_mutex);
+ srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
while (buf_page_cleaner_is_active) {
pthread_cond_signal(&buf_pool.do_flush_list);
my_cond_wait(&buf_pool.done_flush_list,