From f59a1826f81e3246735c2ca61daec794170b61d1 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Wed, 29 Nov 2017 22:56:23 +0000 Subject: MDEV-14536 : during backup, retry read of log blocks, if there is (possibly intermittent) checksum mismatch. --- extra/mariabackup/xtrabackup.cc | 11 ++++- .../suite/mariabackup/log_checksum_mismatch.result | 14 ++++++ .../suite/mariabackup/log_checksum_mismatch.test | 32 +++++++++++++ storage/innobase/include/log0recv.h | 9 ++-- storage/innobase/log/log0recv.cc | 56 +++++++++++++--------- 5 files changed, 94 insertions(+), 28 deletions(-) create mode 100644 mysql-test/suite/mariabackup/log_checksum_mismatch.result create mode 100644 mysql-test/suite/mariabackup/log_checksum_mismatch.test diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index c0b79f01cd5..90a0c829773 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -2401,8 +2401,15 @@ xtrabackup_copy_logfile(copy_logfile copy) log_mutex_enter(); - lsn_t lsn = log_group_read_log_seg(log_sys->buf, &log_sys->log, - start_lsn, end_lsn); + lsn_t lsn= start_lsn; + for(int retries= 0; retries < 100; retries++) { + if (log_group_read_log_seg(log_sys->buf, &log_sys->log, + &lsn, end_lsn)){ + break; + } + msg("Retrying read of a redo log block"); + my_sleep(1000); + } start_lsn = xtrabackup_copy_log(copy, start_lsn, lsn); diff --git a/mysql-test/suite/mariabackup/log_checksum_mismatch.result b/mysql-test/suite/mariabackup/log_checksum_mismatch.result new file mode 100644 index 00000000000..806a5e62cb6 --- /dev/null +++ b/mysql-test/suite/mariabackup/log_checksum_mismatch.result @@ -0,0 +1,14 @@ +CREATE TABLE t(i INT) ENGINE INNODB; +INSERT INTO t VALUES(1); +# xtrabackup backup +FOUND 1 /Invalid log block checksum/ in backup.log +INSERT INTO t VALUES(2); +# xtrabackup prepare +# shutdown server +# remove datadir +# xtrabackup move back +# restart server +SELECT * FROM t; +i +1 +DROP TABLE t; diff --git a/mysql-test/suite/mariabackup/log_checksum_mismatch.test b/mysql-test/suite/mariabackup/log_checksum_mismatch.test new file mode 100644 index 00000000000..f041b0aeedd --- /dev/null +++ b/mysql-test/suite/mariabackup/log_checksum_mismatch.test @@ -0,0 +1,32 @@ +--source include/have_debug.inc + +CREATE TABLE t(i INT) ENGINE INNODB; +INSERT INTO t VALUES(1); +echo # xtrabackup backup; +let $targetdir=$MYSQLTEST_VARDIR/tmp/backup; +let $backuplog=$MYSQLTEST_VARDIR/tmp/backup.log; + +--disable_result_log +exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir --dbug=+d,log_intermittent_checksum_mismatch > $backuplog; +--enable_result_log + +--let SEARCH_RANGE = 10000000 +--let SEARCH_PATTERN=Invalid log block checksum +--let SEARCH_FILE=$backuplog +--source include/search_pattern_in_file.inc +remove_file $backuplog; + + +INSERT INTO t VALUES(2); + + +echo # xtrabackup prepare; +--disable_result_log +exec $XTRABACKUP --prepare --target-dir=$targetdir; +-- source include/restart_and_restore.inc +--enable_result_log + +SELECT * FROM t; +DROP TABLE t; +rmdir $targetdir; + diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 24ad9ae2a30..29784a59d95 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -99,14 +99,15 @@ recv_sys_debug_free(void); /** Read a log segment to a buffer. @param[out] buf buffer @param[in] group redo log files -@param[in] start_lsn read area start +@param[in, out] start_lsn in : read area start, out: the last read valid lsn @param[in] end_lsn read area end -@return valid end_lsn */ -lsn_t +@param[out] invalid_block - invalid, (maybe incompletely written) block encountered +@return false, if invalid block encountered (e.g checksum mismatch), true otherwise */ +bool log_group_read_log_seg( byte* buf, const log_group_t* group, - lsn_t start_lsn, + lsn_t* start_lsn, lsn_t end_lsn); /********************************************************//** diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 2d707820f4a..2e967a99121 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -608,28 +608,29 @@ recv_sys_debug_free(void) /** Read a log segment to a buffer. @param[out] buf buffer @param[in] group redo log files -@param[in] start_lsn read area start +@param[in, out] start_lsn in : read area start, out: the last read valid lsn @param[in] end_lsn read area end -@return valid end_lsn */ -lsn_t +@param[out] invalid_block - invalid, (maybe incompletely written) block encountered +@return false, if invalid block encountered (e.g checksum mismatch), true otherwise */ +bool log_group_read_log_seg( byte* buf, const log_group_t* group, - lsn_t start_lsn, + lsn_t *start_lsn, lsn_t end_lsn) { ulint len; lsn_t source_offset; - + bool success = true; ut_ad(log_mutex_own()); - ut_ad(!(start_lsn % OS_FILE_LOG_BLOCK_SIZE)); + ut_ad(!(*start_lsn % OS_FILE_LOG_BLOCK_SIZE)); ut_ad(!(end_lsn % OS_FILE_LOG_BLOCK_SIZE)); loop: - source_offset = log_group_calc_lsn_offset(start_lsn, group); + source_offset = log_group_calc_lsn_offset(*start_lsn, group); - ut_a(end_lsn - start_lsn <= ULINT_MAX); - len = (ulint) (end_lsn - start_lsn); + ut_a(end_lsn - *start_lsn <= ULINT_MAX); + len = (ulint) (end_lsn - *start_lsn); ut_ad(len != 0); @@ -659,16 +660,16 @@ loop: for (ulint l = 0; l < len; l += OS_FILE_LOG_BLOCK_SIZE, buf += OS_FILE_LOG_BLOCK_SIZE, - start_lsn += OS_FILE_LOG_BLOCK_SIZE) { + (*start_lsn) += OS_FILE_LOG_BLOCK_SIZE) { const ulint block_number = log_block_get_hdr_no(buf); - if (block_number != log_block_convert_lsn_to_no(start_lsn)) { + if (block_number != log_block_convert_lsn_to_no(*start_lsn)) { /* Garbage or an incompletely written log block. We will not report any error, because this can happen when InnoDB was killed while it was writing redo log. We simply treat this as an abrupt end of the redo log. */ - end_lsn = start_lsn; + end_lsn = *start_lsn; break; } @@ -676,6 +677,13 @@ loop: ulint crc = log_block_calc_checksum_crc32(buf); ulint cksum = log_block_get_checksum(buf); + DBUG_EXECUTE_IF("log_intermittent_checksum_mismatch", { + static int block_counter; + if (block_counter++ == 0) { + cksum = crc + 1; + } + }); + if (crc != cksum) { ib::error() << "Invalid log block checksum." << " block: " << block_number @@ -683,30 +691,33 @@ loop: << log_block_get_checkpoint_no(buf) << " expected: " << crc << " found: " << cksum; - end_lsn = start_lsn; + end_lsn = *start_lsn; + success = false; break; } if (group->is_encrypted()) { - log_crypt(buf, start_lsn, + log_crypt(buf, *start_lsn, OS_FILE_LOG_BLOCK_SIZE, true); } } } if (recv_sys->report(ut_time())) { - ib::info() << "Read redo log up to LSN=" << start_lsn; + ib::info() << "Read redo log up to LSN=" << *start_lsn; sd_notifyf(0, "STATUS=Read redo log up to LSN=" LSN_PF, - start_lsn); + *start_lsn); } - if (start_lsn != end_lsn) { + if (*start_lsn != end_lsn) { goto loop; } - return(start_lsn); + return(success); } + + /********************************************************//** Copies a log segment from the most up-to-date log group to the other log groups, so that they all contain the latest log data. Also writes the info @@ -721,10 +732,10 @@ recv_synchronize_groups() /* Read the last recovered log block to the recovery system buffer: the block is always incomplete */ - const lsn_t start_lsn = ut_uint64_align_down(recovered_lsn, + lsn_t start_lsn = ut_uint64_align_down(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE); log_group_read_log_seg(log_sys->buf, &log_sys->log, - start_lsn, start_lsn + OS_FILE_LOG_BLOCK_SIZE); + &start_lsn, start_lsn + OS_FILE_LOG_BLOCK_SIZE); /* Update the fields in the group struct to correspond to recovered_lsn */ @@ -2903,8 +2914,9 @@ recv_group_scan_log_recs( start_lsn = ut_uint64_align_down(end_lsn, OS_FILE_LOG_BLOCK_SIZE); - end_lsn = log_group_read_log_seg( - log_sys->buf, group, start_lsn, + end_lsn = start_lsn; + log_group_read_log_seg( + log_sys->buf, group, &end_lsn, start_lsn + RECV_SCAN_SIZE); } while (end_lsn != start_lsn && !recv_scan_log_recs( -- cgit v1.2.1