diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2022-01-04 07:40:31 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2022-01-04 07:40:31 +0200 |
commit | 4c3ad24413f234599eda27f4958dd3ff21df3203 (patch) | |
tree | 11a4789464111628d22cdb189d1797e0e4ec2161 /storage/innobase/srv | |
parent | eab89f14ab0069a3f74e80bdf13aa70d85738c79 (diff) | |
download | mariadb-git-4c3ad24413f234599eda27f4958dd3ff21df3203.tar.gz |
MDEV-27416 InnoDB hang in buf_flush_wait_flushed(), on log checkpointbb-10.5-MDEV-27416
InnoDB could sometimes hang when triggering a log checkpoint. This is
due to commit 7b1252c03d7131754d9503560fe507b33ca1f8b4 (MDEV-24278),
which introduced an untimed wait to buf_flush_page_cleaner().
The hang was noticed by occasional failures of IMPORT TABLESPACE tests,
such as innodb.innodb-wl5522, which would (unnecessarily) invoke
log_make_checkpoint() from row_import_cleanup().
The reason of the hang was that buf_flush_page_cleaner() would enter
untimed sleep despite buf_flush_sync_lsn being set. The exact failure
scenario is unclear, because buf_flush_sync_lsn should actually be
protected by buf_pool.flush_list_mutex. We prevent the hang by
invoking buf_pool.page_cleaner_set_idle(false) whenever we are
setting buf_flush_sync_lsn and signaling buf_pool.do_flush_list.
The bulk of these changes was originally developed as a preparation
for MDEV-26827, to invoke buf_flush_list() from fewer threads,
and tested on 10.6 by Matthias Leich.
This fix was tested by running 100 repetitions of 100 concurrent instances
of the test innodb.innodb-wl5522 on a RelWithDebInfo build, using ext4fs
and innodb_flush_method=O_DIRECT on a SATA SSD with 4096-byte block size.
During the test, the call to log_make_checkpoint() in row_import_cleanup()
was present.
buf_flush_list(): Make static.
buf_flush_wait(): Wait for buf_pool.get_oldest_modification()
to reach a target, by work done in the buf_flush_page_cleaner.
If buf_flush_sync_lsn is going to be set, we will invoke
buf_pool.page_cleaner_set_idle(false).
buf_flush_ahead(): If buf_flush_sync_lsn or buf_flush_async_lsn
is going to be set and the page cleaner woken up, we will invoke
buf_pool.page_cleaner_set_idle(false).
buf_flush_wait_flushed(): Invoke buf_flush_wait().
buf_flush_sync(): Invoke recv_sys.apply() at the start in case
crash recovery is active. Invoke buf_flush_wait().
buf_flush_sync_batch(): A lower-level variant of buf_flush_sync()
that is only called by recv_sys_t::apply().
buf_flush_sync_for_checkpoint(): Do not trigger log apply
or checkpoint during recovery.
buf_dblwr_t::create(): Only initiate a buffer pool flush, not
a checkpoint.
row_import_cleanup(): Do not unnecessarily invoke log_make_checkpoint().
Invoking buf_flush_list_space() before starting to generate redo log
for the imported tablespace should suffice.
srv_prepare_to_delete_redo_log_file():
Set recv_sys.recovery_on in order to prevent
buf_flush_sync_for_checkpoint() from initiating a checkpoint
while the log is inaccessible. Remove a wait loop that is already
part of buf_flush_sync().
Do not invoke fil_names_clear() if the log is being upgraded,
because the FILE_MODIFY record is specific to the latest format.
create_log_file(): Clear recv_sys.recovery_on only after calling
log_make_checkpoint(), to prevent buf_flush_page_cleaner from
invoking a checkpoint.
innodb_shutdown(): Simplify the logic in mariadb-backup --prepare.
os_aio_wait_until_no_pending_writes(): Update the function comment.
Apart from row_quiesce_table_start() during FLUSH TABLES...FOR EXPORT,
this is being called by buf_flush_list_space(), which is invoked
by ALTER TABLE...IMPORT TABLESPACE as well as some encryption operations.
Diffstat (limited to 'storage/innobase/srv')
-rw-r--r-- | storage/innobase/srv/srv0srv.cc | 2 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 149 |
2 files changed, 70 insertions, 81 deletions
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 73e67c12d58..c05562daca3 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -557,7 +557,7 @@ char srv_buffer_pool_load_at_startup = TRUE; #ifdef HAVE_PSI_STAGE_INTERFACE /** Performance schema stage event for monitoring ALTER TABLE progress -everything after flush log_make_checkpoint(). */ +in ha_innobase::commit_inplace_alter_table(). */ PSI_stage_info srv_stage_alter_table_end = {0, "alter table (end)", PSI_FLAG_STAGE_PROGRESS}; diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index cf936893e1b..afb7834c9b9 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -329,6 +329,13 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn, log_sys.log.write_header_durable(lsn); + ut_ad(srv_startup_is_before_trx_rollback_phase); + if (create_new_db) { + srv_startup_is_before_trx_rollback_phase = false; + } + + /* Enable checkpoints in buf_flush_page_cleaner(). */ + recv_sys.recovery_on = false; mysql_mutex_unlock(&log_sys.mutex); log_make_checkpoint(); @@ -895,91 +902,74 @@ buffer pools. Flush the redo log buffer to the redo log file. @return lsn upto which data pages have been flushed. */ static lsn_t srv_prepare_to_delete_redo_log_file(bool old_exists) { - DBUG_ENTER("srv_prepare_to_delete_redo_log_file"); + DBUG_ENTER("srv_prepare_to_delete_redo_log_file"); - lsn_t flushed_lsn; - ulint count = 0; + /* Disable checkpoints in the page cleaner. */ + ut_ad(!recv_sys.recovery_on); + recv_sys.recovery_on= true; - if (log_sys.log.subformat != 2) { - srv_log_file_size = 0; - } + /* Clean the buffer pool. */ + buf_flush_sync(); - for (;;) { - /* Clean the buffer pool. */ - buf_flush_sync(); + if (log_sys.log.subformat != 2) + srv_log_file_size= 0; - DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0);); - DBUG_PRINT("ib_log", ("After innodb_log_abort_1")); + DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0);); + DBUG_PRINT("ib_log", ("After innodb_log_abort_1")); - mysql_mutex_lock(&log_sys.mutex); - - fil_names_clear(log_sys.get_lsn(), false); - - flushed_lsn = log_sys.get_lsn(); - - { - ib::info info; - if (srv_log_file_size == 0 - || (log_sys.log.format & ~log_t::FORMAT_ENCRYPTED) - != log_t::FORMAT_10_5) { - info << "Upgrading redo log: "; - } else if (!old_exists - || srv_log_file_size - != srv_log_file_size_requested) { - if (srv_encrypt_log - == (my_bool)log_sys.is_encrypted()) { - info << (srv_encrypt_log - ? "Resizing encrypted" - : "Resizing"); - } else if (srv_encrypt_log) { - info << "Encrypting and resizing"; - } else { - info << "Removing encryption" - " and resizing"; - } - - info << " redo log from " << srv_log_file_size - << " to "; - } else if (srv_encrypt_log) { - info << "Encrypting redo log: "; - } else { - info << "Removing redo log encryption: "; - } - - info << srv_log_file_size_requested - << " bytes; LSN=" << flushed_lsn; - } + mysql_mutex_lock(&log_sys.mutex); + const bool latest_format= (log_sys.log.format & ~log_t::FORMAT_ENCRYPTED) == + log_t::FORMAT_10_5; + lsn_t flushed_lsn= log_sys.get_lsn(); - mysql_mutex_unlock(&log_sys.mutex); + if (latest_format) + { + fil_names_clear(flushed_lsn, false); + flushed_lsn= log_sys.get_lsn(); + } - if (flushed_lsn != log_sys.get_flushed_lsn()) { - log_write_up_to(flushed_lsn, false); - log_sys.log.flush(); - } + { + const char *msg; + if (!latest_format || srv_log_file_size == 0) + { + msg= "Upgrading redo log: "; +same_size: + ib::info() << msg << srv_log_file_size_requested << " bytes; LSN=" + << flushed_lsn; + } + else if (old_exists && srv_log_file_size == srv_log_file_size_requested) + { + msg= srv_encrypt_log + ? "Encrypting redo log: " : "Removing redo log encryption: "; + goto same_size; + } + else + { + if (srv_encrypt_log == (my_bool)log_sys.is_encrypted()) + msg= srv_encrypt_log ? "Resizing encrypted" : "Resizing"; + else + msg= srv_encrypt_log + ? "Encrypting and resizing" + : "Removing encryption and resizing"; + + ib::info() << msg << " redo log from " << srv_log_file_size << " to " + << srv_log_file_size_requested + << " bytes; LSN=" << flushed_lsn; + } + } - ut_ad(flushed_lsn == log_sys.get_lsn()); - - /* Check if the buffer pools are clean. If not - retry till it is clean. */ - if (ulint pending_io = buf_pool.io_pending()) { - count++; - /* Print a message every 60 seconds if we - are waiting to clean the buffer pools */ - if (srv_print_verbose_log && count > 600) { - ib::info() << "Waiting for " - << pending_io << " buffer " - << "page I/Os to complete"; - count = 0; - } + mysql_mutex_unlock(&log_sys.mutex); - os_thread_sleep(100000); - continue; - } + if (flushed_lsn != log_sys.get_flushed_lsn()) + { + log_write_up_to(flushed_lsn, false); + log_sys.log.flush(); + } - break; - } + ut_ad(flushed_lsn == log_sys.get_lsn()); + ut_ad(!buf_pool.any_io_pending()); - DBUG_RETURN(flushed_lsn); + DBUG_RETURN(flushed_lsn); } /** Tries to locate LOG_FILE_NAME and check it's size, etc @@ -1259,7 +1249,7 @@ dberr_t srv_start(bool create_new_db) ut_ad(buf_page_cleaner_is_active); } - srv_startup_is_before_trx_rollback_phase = !create_new_db; + srv_startup_is_before_trx_rollback_phase = true; /* Check if undo tablespaces and redo log files exist before creating a new system tablespace */ @@ -1308,7 +1298,6 @@ dberr_t srv_start(bool create_new_db) if (create_new_db) { flushed_lsn = log_sys.get_lsn(); log_sys.set_flushed_lsn(flushed_lsn); - buf_flush_sync(); err = create_log_file(true, flushed_lsn, logfile0); @@ -1371,6 +1360,9 @@ dberr_t srv_start(bool create_new_db) if (!log_set_capacity(srv_log_file_size_requested)) { return(srv_init_abort(DB_ERROR)); } + + /* Enable checkpoints in the page cleaner. */ + recv_sys.recovery_on = false; } file_checked: @@ -2024,11 +2016,8 @@ void innodb_shutdown() break; case SRV_OPERATION_RESTORE: case SRV_OPERATION_RESTORE_EXPORT: - srv_shutdown_state = SRV_SHUTDOWN_CLEANUP; - if (!buf_page_cleaner_is_active) { - break; - } mysql_mutex_lock(&buf_pool.flush_list_mutex); + srv_shutdown_state = SRV_SHUTDOWN_CLEANUP; while (buf_page_cleaner_is_active) { pthread_cond_signal(&buf_pool.do_flush_list); my_cond_wait(&buf_pool.done_flush_list, |