diff options
Diffstat (limited to 'storage/xtradb/log/log0online.c')
-rw-r--r-- | storage/xtradb/log/log0online.c | 1085 |
1 files changed, 1085 insertions, 0 deletions
diff --git a/storage/xtradb/log/log0online.c b/storage/xtradb/log/log0online.c new file mode 100644 index 00000000000..1d478c467e6 --- /dev/null +++ b/storage/xtradb/log/log0online.c @@ -0,0 +1,1085 @@ +/***************************************************************************** + +Copyright (c) 2011-2012 Percona Inc. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/**************************************************//** +@file log/log0online.c +Online database log parsing for changed page tracking + +*******************************************************/ + +#include "log0online.h" + +#include "my_dbug.h" + +#include "log0recv.h" +#include "mach0data.h" +#include "mtr0log.h" +#include "srv0srv.h" +#include "srv0start.h" +#include "trx0sys.h" +#include "ut0rbt.h" + +enum { FOLLOW_SCAN_SIZE = 4 * (UNIV_PAGE_SIZE_MAX) }; + +/** Log parsing and bitmap output data structure */ +struct log_bitmap_struct { + byte read_buf[FOLLOW_SCAN_SIZE]; + /*!< log read buffer */ + byte parse_buf[RECV_PARSING_BUF_SIZE]; + /*!< log parse buffer */ + byte* parse_buf_end; /*!< parse buffer position where the + next read log data should be copied to. + If the previous log records were fully + parsed, it points to the start, + otherwise points immediatelly past the + end of the incomplete log record. */ + char* out_name; /*!< the file name for bitmap output */ + os_file_t out; /*!< the bitmap output file */ + ib_uint64_t out_offset; /*!< the next write position in the + bitmap output file */ + ib_uint64_t start_lsn; /*!< the LSN of the next unparsed + record and the start of the next LSN + interval to be parsed. */ + ib_uint64_t end_lsn; /*!< the end of the LSN interval to be + parsed, equal to the next checkpoint + LSN at the time of parse */ + ib_uint64_t next_parse_lsn; /*!< the LSN of the next unparsed + record in the current parse */ + ib_rbt_t* modified_pages; /*!< the current modified page set, + organized as the RB-tree with the keys + of (space, 4KB-block-start-page-id) + pairs */ + ib_rbt_node_t* page_free_list; /*!< Singly-linked list of freed nodes + of modified_pages tree for later + reuse. Nodes are linked through + ib_rbt_node_t.left as this field has + both the correct type and the tree does + not mind its overwrite during + rbt_next() tree traversal. */ +}; + +/* The log parsing and bitmap output struct instance */ +static struct log_bitmap_struct* log_bmp_sys; + +/* File name stem for modified page bitmaps */ +static const char* modified_page_stem = "ib_modified_log."; + +/* On server startup with empty database srv_start_lsn == 0, in +which case the first LSN of actual log records will be this. */ +#define MIN_TRACKED_LSN ((LOG_START_LSN) + (LOG_BLOCK_HDR_SIZE)) + +/* Tests if num bit of bitmap is set */ +#define IS_BIT_SET(bitmap, num) \ + (*((bitmap) + ((num) >> 3)) & (1UL << ((num) & 7UL))) + +/** The bitmap file block size in bytes. All writes will be multiples of this. + */ +enum { + MODIFIED_PAGE_BLOCK_SIZE = 4096 +}; + + +/** Offsets in a file bitmap block */ +enum { + MODIFIED_PAGE_IS_LAST_BLOCK = 0,/* 1 if last block in the current + write, 0 otherwise. */ + MODIFIED_PAGE_START_LSN = 4, /* The starting tracked LSN of this and + other blocks in the same write */ + MODIFIED_PAGE_END_LSN = 12, /* The ending tracked LSN of this and + other blocks in the same write */ + MODIFIED_PAGE_SPACE_ID = 20, /* The space ID of tracked pages in + this block */ + MODIFIED_PAGE_1ST_PAGE_ID = 24, /* The page ID of the first tracked + page in this block */ + MODIFIED_PAGE_BLOCK_UNUSED_1 = 28,/* Unused in order to align the start + of bitmap at 8 byte boundary */ + MODIFIED_PAGE_BLOCK_BITMAP = 32,/* Start of the bitmap itself */ + MODIFIED_PAGE_BLOCK_UNUSED_2 = MODIFIED_PAGE_BLOCK_SIZE - 8, + /* Unused in order to align the end of + bitmap at 8 byte boundary */ + MODIFIED_PAGE_BLOCK_CHECKSUM = MODIFIED_PAGE_BLOCK_SIZE - 4 + /* The checksum of the current block */ +}; + +/** Length of the bitmap data in a block in bytes */ +enum { MODIFIED_PAGE_BLOCK_BITMAP_LEN + = MODIFIED_PAGE_BLOCK_UNUSED_2 - MODIFIED_PAGE_BLOCK_BITMAP }; + +/** Length of the bitmap data in a block in page ids */ +enum { MODIFIED_PAGE_BLOCK_ID_COUNT = MODIFIED_PAGE_BLOCK_BITMAP_LEN * 8 }; + +/****************************************************************//** +Provide a comparisson function for the RB-tree tree (space, +block_start_page) pairs. Actual implementation does not matter as +long as the ordering is full. +@return -1 if p1 < p2, 0 if p1 == p2, 1 if p1 > p2 +*/ +static +int +log_online_compare_bmp_keys( +/*========================*/ + const void* p1, /*!<in: 1st key to compare */ + const void* p2) /*!<in: 2nd key to compare */ +{ + const byte *k1 = (const byte *)p1; + const byte *k2 = (const byte *)p2; + + ulint k1_space = mach_read_from_4(k1 + MODIFIED_PAGE_SPACE_ID); + ulint k2_space = mach_read_from_4(k2 + MODIFIED_PAGE_SPACE_ID); + if (k1_space == k2_space) { + ulint k1_start_page + = mach_read_from_4(k1 + MODIFIED_PAGE_1ST_PAGE_ID); + ulint k2_start_page + = mach_read_from_4(k2 + MODIFIED_PAGE_1ST_PAGE_ID); + return k1_start_page < k2_start_page + ? -1 : k1_start_page > k2_start_page ? 1 : 0; + } + return k1_space < k2_space ? -1 : 1; +} + +/****************************************************************//** +Set a bit for tracked page in the bitmap. Expand the bitmap tree as +necessary. */ +static +void +log_online_set_page_bit( +/*====================*/ + ulint space, /*!<in: log record space id */ + ulint page_no)/*!<in: log record page id */ +{ + ulint block_start_page; + ulint block_pos; + uint bit_pos; + ib_rbt_bound_t tree_search_pos; + byte search_page[MODIFIED_PAGE_BLOCK_SIZE]; + byte *page_ptr; + + ut_a(space != ULINT_UNDEFINED); + ut_a(page_no != ULINT_UNDEFINED); + + block_start_page = page_no / MODIFIED_PAGE_BLOCK_ID_COUNT + * MODIFIED_PAGE_BLOCK_ID_COUNT; + block_pos = block_start_page ? (page_no % block_start_page / 8) + : (page_no / 8); + bit_pos = page_no % 8; + + mach_write_to_4(search_page + MODIFIED_PAGE_SPACE_ID, space); + mach_write_to_4(search_page + MODIFIED_PAGE_1ST_PAGE_ID, + block_start_page); + + if (!rbt_search(log_bmp_sys->modified_pages, &tree_search_pos, + search_page)) { + page_ptr = rbt_value(byte, tree_search_pos.last); + } + else { + ib_rbt_node_t *new_node; + + if (log_bmp_sys->page_free_list) { + new_node = log_bmp_sys->page_free_list; + log_bmp_sys->page_free_list = new_node->left; + } + else { + new_node = ut_malloc(SIZEOF_NODE( + log_bmp_sys->modified_pages)); + } + memset(new_node, 0, SIZEOF_NODE(log_bmp_sys->modified_pages)); + + page_ptr = rbt_value(byte, new_node); + mach_write_to_4(page_ptr + MODIFIED_PAGE_SPACE_ID, space); + mach_write_to_4(page_ptr + MODIFIED_PAGE_1ST_PAGE_ID, + block_start_page); + + rbt_add_preallocated_node(log_bmp_sys->modified_pages, + &tree_search_pos, new_node); + } + page_ptr[MODIFIED_PAGE_BLOCK_BITMAP + block_pos] |= (1U << bit_pos); +} + +/****************************************************************//** +Calculate a bitmap block checksum. Algorithm borrowed from +log_block_calc_checksum. +@return checksum */ +UNIV_INLINE +ulint +log_online_calc_checksum( +/*=====================*/ + const byte* block) /*!<in: bitmap block */ +{ + ulint sum; + ulint sh; + ulint i; + + sum = 1; + sh = 0; + + for (i = 0; i < MODIFIED_PAGE_BLOCK_CHECKSUM; i++) { + + ulint b = block[i]; + sum &= 0x7FFFFFFFUL; + sum += b; + sum += b << sh; + sh++; + if (sh > 24) { + sh = 0; + } + } + + return sum; +} + +/****************************************************************//** +Get the last tracked fully LSN from the bitmap file by reading +backwards untile a correct end page is found. Detects incomplete +writes and corrupted data. Sets the start output position for the +written bitmap data. +@return the last fully tracked LSN */ +static +ib_uint64_t +log_online_read_last_tracked_lsn() +/*==============================*/ +{ + byte page[MODIFIED_PAGE_BLOCK_SIZE]; + ib_uint64_t read_offset = log_bmp_sys->out_offset; + /* Initialize these to nonequal values so that file size == 0 case with + zero loop repetitions is handled correctly */ + ulint checksum = 0; + ulint actual_checksum = !checksum; + ibool is_last_page = FALSE; + ib_uint64_t result; + + ut_ad(log_bmp_sys->out_offset % MODIFIED_PAGE_BLOCK_SIZE == 0); + + while (checksum != actual_checksum && read_offset > 0 && !is_last_page) + { + + ulint offset_low, offset_high; + ibool success; + + read_offset -= MODIFIED_PAGE_BLOCK_SIZE; + offset_high = (ulint)(read_offset >> 32); + offset_low = (ulint)(read_offset & 0xFFFFFFFF); + + success = os_file_read(log_bmp_sys->out, page, offset_low, + offset_high, MODIFIED_PAGE_BLOCK_SIZE); + if (!success) { + + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + /* Here and below assume that bitmap file names do not + contain apostrophes, thus no need for + ut_print_filename(). */ + fprintf(stderr, "InnoDB: Warning: failed reading " + "changed page bitmap file \'%s\'\n", + log_bmp_sys->out_name); + return MIN_TRACKED_LSN; + } + + is_last_page + = mach_read_from_4(page + MODIFIED_PAGE_IS_LAST_BLOCK); + checksum = mach_read_from_4(page + + MODIFIED_PAGE_BLOCK_CHECKSUM); + actual_checksum = log_online_calc_checksum(page); + if (checksum != actual_checksum) { + + fprintf(stderr, "InnoDB: Warning: corruption " + "detected in \'%s\' at offset %llu\n", + log_bmp_sys->out_name, read_offset); + } + + }; + + if (UNIV_LIKELY(checksum == actual_checksum && is_last_page)) { + + log_bmp_sys->out_offset = read_offset + + MODIFIED_PAGE_BLOCK_SIZE; + result = mach_read_from_8(page + MODIFIED_PAGE_END_LSN); + } + else { + log_bmp_sys->out_offset = read_offset; + result = 0; + } + + /* Truncate the output file to discard the corrupted bitmap data, if + any */ + if (!os_file_set_eof_at(log_bmp_sys->out, + log_bmp_sys->out_offset)) { + fprintf(stderr, "InnoDB: Warning: failed truncating " + "changed page bitmap file \'%s\' to %llu bytes\n", + log_bmp_sys->out_name, log_bmp_sys->out_offset); + result = 0; + } + return result; +} + +/****************************************************************//** +Safely write the log_sys->tracked_lsn value. Uses atomic operations +if available, otherwise this field is protected with the log system +mutex. The reader counterpart function is log_get_tracked_lsn() in +log0log.c. */ +UNIV_INLINE +void +log_set_tracked_lsn( +/*================*/ + ib_uint64_t tracked_lsn) /*!<in: new value */ +{ +#ifdef HAVE_ATOMIC_BUILTINS_64 + /* Single writer, no data race here */ + ib_uint64_t old_value + = os_atomic_increment_uint64(&log_sys->tracked_lsn, 0); + (void) os_atomic_increment_uint64(&log_sys->tracked_lsn, + tracked_lsn - old_value); +#else + mutex_enter(&log_sys->mutex); + log_sys->tracked_lsn = tracked_lsn; + mutex_exit(&log_sys->mutex); +#endif +} + +/****************************************************************//** +Diagnose a gap in tracked LSN range on server startup due to crash or +very fast shutdown and try to close it by tracking the data +immediatelly, if possible. */ +static +void +log_online_track_missing_on_startup( +/*================================*/ + ib_uint64_t last_tracked_lsn, /*!<in: last tracked LSN read + from the bitmap file */ + ib_uint64_t tracking_start_lsn) /*!<in: last checkpoint LSN of + the current server startup */ +{ + ut_ad(last_tracked_lsn != tracking_start_lsn); + + fprintf(stderr, "InnoDB: last tracked LSN in \'%s\' is %llu, but " + "last checkpoint LSN is %llu. This might be due to a server " + "crash or a very fast shutdown. ", log_bmp_sys->out_name, + last_tracked_lsn, tracking_start_lsn); + + /* last_tracked_lsn might be < MIN_TRACKED_LSN in the case of empty + bitmap file, handle this too. */ + last_tracked_lsn = ut_max(last_tracked_lsn, MIN_TRACKED_LSN); + + /* See if we can fully recover the missing interval */ + if (log_sys->lsn - last_tracked_lsn < log_sys->log_group_capacity) { + + fprintf(stderr, + "Reading the log to advance the last tracked LSN.\n"); + + log_bmp_sys->start_lsn = last_tracked_lsn; + log_set_tracked_lsn(log_bmp_sys->start_lsn); + log_online_follow_redo_log(); + ut_ad(log_bmp_sys->end_lsn >= tracking_start_lsn); + + fprintf(stderr, + "InnoDB: continuing tracking changed pages from LSN " + "%llu\n", log_bmp_sys->end_lsn); + } + else { + fprintf(stderr, + "The age of last tracked LSN exceeds log capacity, " + "tracking-based incremental backups will work only " + "from the higher LSN!\n"); + + log_bmp_sys->end_lsn = log_bmp_sys->start_lsn + = tracking_start_lsn; + log_set_tracked_lsn(log_bmp_sys->start_lsn); + + fprintf(stderr, + "InnoDB: starting tracking changed pages from LSN " + "%llu\n", log_bmp_sys->end_lsn); + } +} + +/*********************************************************************//** +Initialize the online log following subsytem. */ +UNIV_INTERN +void +log_online_read_init() +/*==================*/ +{ + char buf[FN_REFLEN]; + ibool success; + ib_uint64_t tracking_start_lsn + = ut_max(log_sys->last_checkpoint_lsn, MIN_TRACKED_LSN); + + /* Assert (could be compile-time assert) that bitmap data start and end + in a bitmap block is 8-byte aligned */ + ut_a(MODIFIED_PAGE_BLOCK_BITMAP % 8 == 0); + ut_a(MODIFIED_PAGE_BLOCK_BITMAP_LEN % 8 == 0); + + log_bmp_sys = ut_malloc(sizeof(*log_bmp_sys)); + + ut_snprintf(buf, FN_REFLEN, "%s%s%d", srv_data_home, + modified_page_stem, 1); + log_bmp_sys->out_name = ut_malloc(strlen(buf) + 1); + ut_strcpy(log_bmp_sys->out_name, buf); + + log_bmp_sys->modified_pages = rbt_create(MODIFIED_PAGE_BLOCK_SIZE, + log_online_compare_bmp_keys); + log_bmp_sys->page_free_list = NULL; + + log_bmp_sys->out + = os_file_create_simple_no_error_handling + (innodb_file_bmp_key, log_bmp_sys->out_name, OS_FILE_OPEN, + OS_FILE_READ_WRITE, &success); + + if (!success) { + + /* New file, tracking from scratch */ + log_bmp_sys->out + = os_file_create_simple_no_error_handling + (innodb_file_bmp_key, log_bmp_sys->out_name, + OS_FILE_CREATE, OS_FILE_READ_WRITE, &success); + if (!success) { + + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + fprintf(stderr, + "InnoDB: Error: Cannot create \'%s\'\n", + log_bmp_sys->out_name); + exit(1); + } + + log_bmp_sys->out_offset = 0; + } + else { + + /* Old file, read last tracked LSN and continue from there */ + ulint size_low; + ulint size_high; + ib_uint64_t last_tracked_lsn; + + success = os_file_get_size(log_bmp_sys->out, &size_low, + &size_high); + ut_a(success); + + log_bmp_sys->out_offset + = ((ib_uint64_t)size_high << 32) | size_low; + + if (log_bmp_sys->out_offset % MODIFIED_PAGE_BLOCK_SIZE != 0) { + + fprintf(stderr, + "InnoDB: Warning: truncated block detected " + "in \'%s\' at offset %llu\n", + log_bmp_sys->out_name, + log_bmp_sys->out_offset); + log_bmp_sys->out_offset -= + log_bmp_sys->out_offset + % MODIFIED_PAGE_BLOCK_SIZE; + } + + last_tracked_lsn = log_online_read_last_tracked_lsn(); + + if (last_tracked_lsn < tracking_start_lsn) { + + log_online_track_missing_on_startup(last_tracked_lsn, + tracking_start_lsn); + return; + } + + if (last_tracked_lsn > tracking_start_lsn) { + + fprintf(stderr, "InnoDB: last tracked LSN in \'%s\' " + "is %llu, but last checkpoint LSN is %llu. " + "The tracking-based incremental backups will " + "work only from the latter LSN!\n", + log_bmp_sys->out_name, last_tracked_lsn, + tracking_start_lsn); + } + + } + + fprintf(stderr, "InnoDB: starting tracking changed pages from " + "LSN %llu\n", tracking_start_lsn); + log_bmp_sys->start_lsn = tracking_start_lsn; + log_set_tracked_lsn(tracking_start_lsn); +} + +/*********************************************************************//** +Shut down the online log following subsystem. */ +UNIV_INTERN +void +log_online_read_shutdown() +/*======================*/ +{ + ib_rbt_node_t *free_list_node = log_bmp_sys->page_free_list; + + os_file_close(log_bmp_sys->out); + + rbt_free(log_bmp_sys->modified_pages); + + while (free_list_node) { + ib_rbt_node_t *next = free_list_node->left; + ut_free(free_list_node); + free_list_node = next; + } + + ut_free(log_bmp_sys->out_name); + ut_free(log_bmp_sys); +} + +/*********************************************************************//** +For the given minilog record type determine if the record has (space; page) +associated with it. +@return TRUE if the record has (space; page) in it */ +static +ibool +log_online_rec_has_page( +/*====================*/ + byte type) /*!<in: the minilog record type */ +{ + return type != MLOG_MULTI_REC_END && type != MLOG_DUMMY_RECORD; +} + +/*********************************************************************//** +Check if a page field for a given log record type actually contains a page +id. It does not for file operations and MLOG_LSN. +@return TRUE if page field contains actual page id, FALSE otherwise */ +static +ibool +log_online_rec_page_means_page( +/*===========================*/ + byte type) /*!<in: log record type */ +{ + return log_online_rec_has_page(type) +#ifdef UNIV_LOG_LSN_DEBUG + && type != MLOG_LSN +#endif + && type != MLOG_FILE_CREATE + && type != MLOG_FILE_RENAME + && type != MLOG_FILE_DELETE + && type != MLOG_FILE_CREATE2; +} + +/*********************************************************************//** +Parse the log data in the parse buffer for the (space, page) pairs and add +them to the modified page set as necessary. Removes the fully-parsed records +from the buffer. If an incomplete record is found, moves it to the end of the +buffer. */ +static +void +log_online_parse_redo_log() +/*=======================*/ +{ + byte *ptr = log_bmp_sys->parse_buf; + byte *end = log_bmp_sys->parse_buf_end; + + ulint len = 0; + + while (ptr != end + && log_bmp_sys->next_parse_lsn < log_bmp_sys->end_lsn) { + + byte type; + ulint space; + ulint page_no; + byte* body; + + /* recv_sys is not initialized, so on corrupt log we will + SIGSEGV. But the log of a live database should not be + corrupt. */ + len = recv_parse_log_rec(ptr, end, &type, &space, &page_no, + &body); + if (len > 0) { + + if (log_online_rec_page_means_page(type) + && (space != TRX_DOUBLEWRITE_SPACE)) { + + ut_a(len >= 3); + log_online_set_page_bit(space, page_no); + } + + ptr += len; + ut_ad(ptr <= end); + log_bmp_sys->next_parse_lsn + = recv_calc_lsn_on_data_add + (log_bmp_sys->next_parse_lsn, len); + } + else { + + /* Incomplete log record. Shift it to the + beginning of the parse buffer and leave it to be + completed on the next read. */ + ut_memmove(log_bmp_sys->parse_buf, ptr, end - ptr); + log_bmp_sys->parse_buf_end + = log_bmp_sys->parse_buf + (end - ptr); + ptr = end; + } + } + + if (len > 0) { + + log_bmp_sys->parse_buf_end = log_bmp_sys->parse_buf; + } +} + +/*********************************************************************//** +Check the log block checksum. +@return TRUE if the log block checksum is OK, FALSE otherwise. */ +static +ibool +log_online_is_valid_log_seg( +/*========================*/ + const byte* log_block) /*!< in: read log data */ +{ + ibool checksum_is_ok + = log_block_checksum_is_ok_or_old_format(log_block); + + if (!checksum_is_ok) { + + fprintf(stderr, + "InnoDB Error: log block checksum mismatch" + "expected %lu, calculated checksum %lu\n", + (ulong) log_block_get_checksum(log_block), + (ulong) log_block_calc_checksum(log_block)); + } + + return checksum_is_ok; +} + +/*********************************************************************//** +Copy new log data to the parse buffer while skipping log block header, +trailer and already parsed data. */ +static +void +log_online_add_to_parse_buf( +/*========================*/ + const byte* log_block, /*!< in: read log data */ + ulint data_len, /*!< in: length of read log data */ + ulint skip_len) /*!< in: how much of log data to + skip */ +{ + ulint start_offset = skip_len ? skip_len : LOG_BLOCK_HDR_SIZE; + ulint end_offset + = (data_len == OS_FILE_LOG_BLOCK_SIZE) + ? data_len - LOG_BLOCK_TRL_SIZE + : data_len; + ulint actual_data_len = (end_offset >= start_offset) + ? end_offset - start_offset : 0; + + ut_memcpy(log_bmp_sys->parse_buf_end, log_block + start_offset, + actual_data_len); + + log_bmp_sys->parse_buf_end += actual_data_len; + + ut_a(log_bmp_sys->parse_buf_end - log_bmp_sys->parse_buf + <= RECV_PARSING_BUF_SIZE); +} + +/*********************************************************************//** +Parse the log block: first copies the read log data to the parse buffer while +skipping log block header, trailer and already parsed data. Then it actually +parses the log to add to the modified page bitmap. */ +static +void +log_online_parse_redo_log_block( +/*============================*/ + const byte* log_block, /*!< in: read log data */ + ulint skip_already_parsed_len) /*!< in: how many bytes of + log data should be skipped as + they were parsed before */ +{ + ulint block_data_len; + + block_data_len = log_block_get_data_len(log_block); + + ut_ad(block_data_len % OS_FILE_LOG_BLOCK_SIZE == 0 + || block_data_len < OS_FILE_LOG_BLOCK_SIZE); + + log_online_add_to_parse_buf(log_block, block_data_len, + skip_already_parsed_len); + log_online_parse_redo_log(); +} + +/*********************************************************************//** +Read and parse one redo log chunk and updates the modified page bitmap. */ +static +void +log_online_follow_log_seg( +/*======================*/ + log_group_t* group, /*!< in: the log group to use */ + ib_uint64_t block_start_lsn, /*!< in: the LSN to read from */ + ib_uint64_t block_end_lsn) /*!< in: the LSN to read to */ +{ + /* Pointer to the current OS_FILE_LOG_BLOCK-sized chunk of the read log + data to parse */ + byte* log_block = log_bmp_sys->read_buf; + byte* log_block_end = log_bmp_sys->read_buf + + (block_end_lsn - block_start_lsn); + + mutex_enter(&log_sys->mutex); + log_group_read_log_seg(LOG_RECOVER, log_bmp_sys->read_buf, + group, block_start_lsn, block_end_lsn); + mutex_exit(&log_sys->mutex); + + while (log_block < log_block_end + && log_bmp_sys->next_parse_lsn < log_bmp_sys->end_lsn) { + + /* How many bytes of log data should we skip in the current log + block. Skipping is necessary because we round down the next + parse LSN thus it is possible to read the already-processed log + data many times */ + ulint skip_already_parsed_len = 0; + + if (!log_online_is_valid_log_seg(log_block)) { + break; + } + + if ((block_start_lsn <= log_bmp_sys->next_parse_lsn) + && (block_start_lsn + OS_FILE_LOG_BLOCK_SIZE + > log_bmp_sys->next_parse_lsn)) { + + /* The next parse LSN is inside the current block, skip + data preceding it. */ + skip_already_parsed_len + = log_bmp_sys->next_parse_lsn + - block_start_lsn; + } + else { + + /* If the next parse LSN is not inside the current + block, then the only option is that we have processed + ahead already. */ + ut_a(block_start_lsn > log_bmp_sys->next_parse_lsn); + } + + /* TODO: merge the copying to the parse buf code with + skip_already_len calculations */ + log_online_parse_redo_log_block(log_block, + skip_already_parsed_len); + + log_block += OS_FILE_LOG_BLOCK_SIZE; + block_start_lsn += OS_FILE_LOG_BLOCK_SIZE; + } + + return; +} + +/*********************************************************************//** +Read and parse the redo log in a given group in FOLLOW_SCAN_SIZE-sized +chunks and updates the modified page bitmap. */ +static +void +log_online_follow_log_group( +/*========================*/ + log_group_t* group, /*!< in: the log group to use */ + ib_uint64_t contiguous_lsn) /*!< in: the LSN of log block start + containing the log_parse_start_lsn */ +{ + ib_uint64_t block_start_lsn = contiguous_lsn; + ib_uint64_t block_end_lsn; + + log_bmp_sys->next_parse_lsn = log_bmp_sys->start_lsn; + log_bmp_sys->parse_buf_end = log_bmp_sys->parse_buf; + + do { + block_end_lsn = block_start_lsn + FOLLOW_SCAN_SIZE; + + log_online_follow_log_seg(group, block_start_lsn, + block_end_lsn); + + /* Next parse LSN can become higher than the last read LSN + only in the case when the read LSN falls right on the block + boundary, in which case next parse lsn is bumped to the actual + data LSN on the next (not yet read) block. This assert is + slightly conservative. */ + ut_a(log_bmp_sys->next_parse_lsn + <= block_end_lsn + LOG_BLOCK_HDR_SIZE + + LOG_BLOCK_TRL_SIZE); + + block_start_lsn = block_end_lsn; + } while (block_end_lsn < log_bmp_sys->end_lsn); + + /* Assert that the last read log record is a full one */ + ut_a(log_bmp_sys->parse_buf_end == log_bmp_sys->parse_buf); +} + +/*********************************************************************//** +Write, flush one bitmap block to disk and advance the output position if +successful. */ +static +void +log_online_write_bitmap_page( +/*=========================*/ + const byte *block) /*!< in: block to write */ +{ + ibool success; + + success = os_file_write(log_bmp_sys->out_name,log_bmp_sys->out, + block, + (ulint)(log_bmp_sys->out_offset & 0xFFFFFFFF), + (ulint)(log_bmp_sys->out_offset << 32), + MODIFIED_PAGE_BLOCK_SIZE); + if (UNIV_UNLIKELY(!success)) { + + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + fprintf(stderr, "InnoDB: Error: failed writing changed page " + "bitmap file \'%s\'\n", log_bmp_sys->out_name); + return; + } + + success = os_file_flush(log_bmp_sys->out, FALSE); + if (UNIV_UNLIKELY(!success)) { + + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + fprintf(stderr, "InnoDB: Error: failed flushing " + "changed page bitmap file \'%s\'\n", + log_bmp_sys->out_name); + return; + } + + log_bmp_sys->out_offset += MODIFIED_PAGE_BLOCK_SIZE; +} + +/*********************************************************************//** +Append the current changed page bitmap to the bitmap file. Clears the +bitmap tree and recycles its nodes to the free list. */ +static +void +log_online_write_bitmap() +/*=====================*/ +{ + ib_rbt_node_t *bmp_tree_node; + const ib_rbt_node_t *last_bmp_tree_node; + + bmp_tree_node = (ib_rbt_node_t *) + rbt_first(log_bmp_sys->modified_pages); + last_bmp_tree_node = rbt_last(log_bmp_sys->modified_pages); + + while (bmp_tree_node) { + + byte *page = rbt_value(byte, bmp_tree_node); + + if (bmp_tree_node == last_bmp_tree_node) { + mach_write_to_4(page + MODIFIED_PAGE_IS_LAST_BLOCK, 1); + } + + mach_write_to_8(page + MODIFIED_PAGE_START_LSN, + log_bmp_sys->start_lsn); + mach_write_to_8(page + MODIFIED_PAGE_END_LSN, + log_bmp_sys->end_lsn); + mach_write_to_4(page + MODIFIED_PAGE_BLOCK_CHECKSUM, + log_online_calc_checksum(page)); + + log_online_write_bitmap_page(page); + + bmp_tree_node->left = log_bmp_sys->page_free_list; + log_bmp_sys->page_free_list = bmp_tree_node; + + bmp_tree_node = (ib_rbt_node_t*) + rbt_next(log_bmp_sys->modified_pages, bmp_tree_node); + } + + rbt_reset(log_bmp_sys->modified_pages); +} + +/*********************************************************************//** +Read and parse the redo log up to last checkpoint LSN to build the changed +page bitmap which is then written to disk. */ +UNIV_INTERN +void +log_online_follow_redo_log() +/*========================*/ +{ + ib_uint64_t contiguous_start_lsn; + log_group_t* group; + + /* Grab the LSN of the last checkpoint, we will parse up to it */ + mutex_enter(&(log_sys->mutex)); + log_bmp_sys->end_lsn = log_sys->last_checkpoint_lsn; + mutex_exit(&(log_sys->mutex)); + + if (log_bmp_sys->end_lsn == log_bmp_sys->start_lsn) { + return; + } + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + ut_a(group); + + contiguous_start_lsn = ut_uint64_align_down(log_bmp_sys->start_lsn, + OS_FILE_LOG_BLOCK_SIZE); + + while (group) { + log_online_follow_log_group(group, contiguous_start_lsn); + group = UT_LIST_GET_NEXT(log_groups, group); + } + + /* A crash injection site that ensures last checkpoint LSN > last + tracked LSN, so that LSN tracking for this interval is tested. */ + DBUG_EXECUTE_IF("crash_before_bitmap_write", DBUG_SUICIDE();); + + log_online_write_bitmap(); + log_bmp_sys->start_lsn = log_bmp_sys->end_lsn; + log_set_tracked_lsn(log_bmp_sys->start_lsn); +} + +/*********************************************************************//** +Initializes log bitmap iterator. +@return TRUE if the iterator is initialized OK, FALSE otherwise. */ +UNIV_INTERN +ibool +log_online_bitmap_iterator_init( +/*============================*/ + log_bitmap_iterator_t *i) /*!<in/out: iterator */ +{ + ibool success; + + ut_a(i); + ut_snprintf(i->in_name, FN_REFLEN, "%s%s%d", srv_data_home, + modified_page_stem, 1); + i->in_offset = 0; + /* + Set up bit offset out of the reasonable limit + to intiate reading block from file in + log_online_bitmap_iterator_next() + */ + i->bit_offset = MODIFIED_PAGE_BLOCK_BITMAP_LEN; + i->in = + os_file_create_simple_no_error_handling(innodb_file_bmp_key, + i->in_name, + OS_FILE_OPEN, + OS_FILE_READ_ONLY, + &success); + + if (!success) { + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + fprintf(stderr, + "InnoDB: Error: Cannot open \'%s\'\n", + i->in_name); + return FALSE; + } + + i->page = ut_malloc(MODIFIED_PAGE_BLOCK_SIZE); + + i->start_lsn = i->end_lsn = 0; + i->space_id = 0; + i->first_page_id = 0; + i->changed = FALSE; + + return TRUE; +} + +/*********************************************************************//** +Releases log bitmap iterator. */ +UNIV_INTERN +void +log_online_bitmap_iterator_release( +/*===============================*/ + log_bitmap_iterator_t *i) /*!<in/out: iterator */ +{ + ut_a(i); + os_file_close(i->in); + ut_free(i->page); +} + +/*********************************************************************//** +Iterates through bits of saved bitmap blocks. +Sequentially reads blocks from bitmap file(s) and interates through +their bits. Ignores blocks with wrong checksum. +@return TRUE if iteration is successful, FALSE if all bits are iterated. */ +UNIV_INTERN +ibool +log_online_bitmap_iterator_next( +/*============================*/ + log_bitmap_iterator_t *i) /*!<in/out: iterator */ +{ + ulint offset_low; + ulint offset_high; + ulint size_low; + ulint size_high; + ulint checksum = 0; + ulint actual_checksum = !checksum; + + ibool success; + + ut_a(i); + + if (i->bit_offset < MODIFIED_PAGE_BLOCK_BITMAP_LEN) + { + ++i->bit_offset; + i->changed = + IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP, + i->bit_offset); + return TRUE; + } + + while (checksum != actual_checksum) + { + success = os_file_get_size(i->in, + &size_low, + &size_high); + if (!success) { + os_file_get_last_error(TRUE); + fprintf(stderr, + "InnoDB: Warning: can't get size of " + "page bitmap file \'%s\'\n", + i->in_name); + return FALSE; + } + + if (i->in_offset >= + (ib_uint64_t)(size_low) + + ((ib_uint64_t)(size_high) << 32)) + return FALSE; + + offset_high = (ulint)(i->in_offset >> 32); + offset_low = (ulint)(i->in_offset & 0xFFFFFFFF); + + success = os_file_read( + i->in, + i->page, + offset_low, + offset_high, + MODIFIED_PAGE_BLOCK_SIZE); + + if (!success) { + os_file_get_last_error(TRUE); + fprintf(stderr, + "InnoDB: Warning: failed reading " + "changed page bitmap file \'%s\'\n", + i->in_name); + return FALSE; + } + + checksum = mach_read_from_4( + i->page + MODIFIED_PAGE_BLOCK_CHECKSUM); + + actual_checksum = log_online_calc_checksum(i->page); + + i->in_offset += MODIFIED_PAGE_BLOCK_SIZE; + } + + i->start_lsn = + mach_read_from_8(i->page + MODIFIED_PAGE_START_LSN); + i->end_lsn = + mach_read_from_8(i->page + MODIFIED_PAGE_END_LSN); + i->space_id = + mach_read_from_4(i->page + MODIFIED_PAGE_SPACE_ID); + i->first_page_id = + mach_read_from_4(i->page + MODIFIED_PAGE_1ST_PAGE_ID); + i->bit_offset = + 0; + i->changed = + IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP, + i->bit_offset); + + return TRUE; +} + |